<a href="https://colab.research.google.com/github/AdityaDhaulakhandi/cv_tasks/blob/main/task1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

License Plate Segmentation

In [None]:
# Create virtual ennv
!apt-get install -y python3-venv

In [None]:
!python3 -m venv task1
!task1/bin/activate.bat

In [None]:
!unzip /content/drive/MyDrive/Task_1/licenseplate.zip

In [None]:
!rm -r task1/

In [None]:
import torch
import torchvision
from torchvision.models.segmentation import deeplabv3_mobilenet_v3_large,DeepLabV3_MobileNet_V3_Large_Weights
from torchvision.models.segmentation import deeplabv3_resnet50,DeepLabV3_ResNet50_Weights
# to load the dataset
from torch.utils.data import Dataset, DataLoader

In [None]:
# model = torchvision.models.segmentation.deeplabv3_mobilenet_v3_large(pretrained=True)

weights = DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT
model = deeplabv3_mobilenet_v3_large(weights)

In [None]:
num_classes = 2  # Set the number of classes : plate and background
batch_size = 16
epochs = 100
data_dir = '/content/licenseplate' #apth to the dataset
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # lower the learning rate

transform = weights.transforms(resize_size=None)

# change the classifier at the end of the model
model.classifier[-1] = torch.nn.Conv2d(256, num_classes, kernel_size=(1, 1))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
model

In [None]:
from PIL import Image
import os

class LicensePlateDataset(Dataset):
  def __init__(self,root_dir,transformer=None):
    self.root_dir = root_dir
    self.image_dir = os.path.join(data_dir, 'Images')
    self.mask_dir = os.path.join(data_dir, 'SegmentationClass')
    self.image_filenames = os.listdir(self.image_dir)
    self.transformer = transformer

  def __len__(self):
    return len(self.image_filenames)


  def __getitem__(self, index):
    image_path = os.path.join(self.image_dir, self.image_filenames[index])
    mask_path = os.path.join(self.mask_dir, self.image_filenames[index])
    
    image = Image.open(image_path).convert('RGB')
    target = Image.open(mask_path).convert('RGB')

    transform_resize = torchvision.transforms.Resize((220,400))
    resized_image,resized_target = transform_resize(image), transform_resize(target)
    
    # Apply any preprocessing
    if self.transformer is not None:
        resized_image = self.transformer(resized_image)
        resized_target = torchvision.transforms.ToTensor()(resized_target)
    
    return resized_image, resized_target

train_dataset = LicensePlateDataset(data_dir,transformer=transform)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)

In [None]:
import copy
#training
model.to(device)

best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 0.1

for epoch in range(epochs):
    running_loss = 0.0
    for phase in ['train','val']:
      if phase == 'train':
        model.train()
      elif epoch%10 == 0:
        model.eval()
        
    for images, targets in train_loader:
        images = images.to(device)
        # print(images.size(0))
        targets = targets.to(device)
        optimizer.zero_grad()
        pred = model(images)['out']
        # print(pred.keys())
        
        loss = loss_fn(pred, torch.argmax(targets, dim=1)) # get a single channel from the mask

        loss.backward()
        optimizer.step()
        # print(loss.item())
        running_loss += loss.item() * images.size(0) # multiplied by batch_size
    
    #end of epoch
    epoch_loss = running_loss / len(train_loader.dataset)
    if best_loss*10000 > epoch_loss*10000:
      best_loss = epoch_loss
      best_model_wts = copy.deepcopy(model.state_dict())


model.load_state_dict(best_model_wts)

<All keys matched successfully>

In [None]:
torch.save(model,f='/content/drive/MyDrive/Task_1/model_90')

In [None]:
model.eval()

In [None]:
print(pred.shape,pred.min().item(),pred.max().item())
# print( torch.argmax(targets, dim=0).shape,targets.min().item(),targets.max().item())
print( targets.shape)

torch.Size([10, 2, 400, 220]) -22.5350284576416 17.79749298095703
torch.Size([10, 3, 400, 220])


In [None]:
best_loss

0.02223743498325348

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

# Prepare the test images folder
test_folder = '/content/licenseplate/test'

test_batch = torch.zeros( (10,3,220,400), dtype=torch.float32)

# Iterate over the test images
for i,image_file in enumerate(os.listdir(test_folder)):
    # Load and preprocess the image
    image_path = os.path.join(test_folder, image_file)
    image = Image.open(image_path).convert("RGB")

    image_tensor = transform(image)
    test_batch[i]=image_tensor
    image_tensor=image_tensor.unsqueeze(0).to(device)
    print(image_tensor.shape)
    
    with torch.no_grad():
        outputs = model(image_tensor)['out']
        predicted_masks = torch.argmax(outputs.softmax(dim=1), dim=1)

    # Convert predicted masks to numpy arrays for visualization
    predicted_masks = predicted_masks.squeeze().cpu().numpy()

    plt.figure()
    plt.imshow(image)
    plt.imshow(predicted_masks,alpha=0.6)
    plt.title("pred")
    plt.axis('off')

    plt.show()
    # plt.savefig('/content/drive/MyDrive/Task_1/results/pred'+str(i)+'.png')

torch.Size([1, 3, 220, 400])


ValueError: ignored

In [None]:
test_batch= test_batch.to(device)

repetitions=10
total_time = 0
with torch.no_grad():
    for rep in range(repetitions):
        starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
        starter.record()
        _ = model(test_batch)
        ender.record()
        torch.cuda.synchronize() # wait til the gpu completes
        curr_time = starter.elapsed_time(ender)/1000 #convert to second
        total_time += curr_time
Throughput =   (repetitions*test_batch.size()[0])/total_time
print('Final Throughput: ',Throughput)
print('Inference Time: ',total_time/(repetitions*test_batch.size()[0]))

Final Throughput:  207.31332257535908
Inference Time:  0.004823616676330566


In [None]:
# Perfomance metric for test  --- mAP