In [None]:
# Imports

import torch
import torch.utils.data
import torchvision
import pandas
import os
import typing
import random
import PIL.Image
import PIL.ImageDraw
import matplotlib.pyplot


In [None]:
# Constants
IMAGE_DIRECTORY_PATH = "archive/license_plates_detection_train/"
IMAGE_ANNOTATIONS_CSV_PATH = "archive/license_plates_detection_train.csv"

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data : typing.Sequence[typing.Tuple[object, object]], input_transform = None, target_transforms=None):
        self.input_transform = input_transform
        self.target_transforms = target_transforms
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        input, target = self.data[index]

        if self.input_transform is not None:
            input = self.input_transform(input)

        if self.target_transforms is not None:
            target = self.target_transforms(target)

        return input, target

    def split(self, ratio):
        split_index = int(ratio * len(self.data))
        return (Dataset(self.data[split_index:], self.input_transform, self.target_transforms),
                Dataset(self.data[:split_index], self.input_transform, self.target_transforms))

    def shuffle(self, seed = None):
        if seed is not None:
            random.seed(seed)
        shuffled_data = self.data
        random.shuffle(shuffled_data)
        return Dataset(shuffled_data, self.input_transform, self.target_transforms)

In [None]:
annotations: typing.Sequence[object]
data: typing.Sequence[typing.Tuple[object, object]]

with open(IMAGE_ANNOTATIONS_CSV_PATH, "r") as file:
    # split by row and comma
    lines = list(map(lambda x: x.split(","), file.read().splitlines()))
    # zip column names together with values
    annotations = list(map(lambda x: dict(zip(lines[0], x)), lines[1:]))
    # convert all digit strings members of object to int
    annotations = list(map(lambda x: {k: int(v) if v.isdigit() else v for k, v in x.items()}, annotations))

inputs = [PIL.Image.open(f"{IMAGE_DIRECTORY_PATH}{annotation['img_id']}") for annotation in annotations]
targets = [torch.Tensor([annotation['xmin'], annotation['ymin'], annotation['xmax'], annotation['ymax']]) for annotation in annotations]
data = list(zip(inputs, targets))

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

input_transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(MEAN, STD),
        #lambda x : x.unsqueeze(0)
    ]
)

target_transform = torchvision.transforms.Compose(
    [
    ]
)

dataset = Dataset(data, input_transform)  # , target_transform)

training_dataset, validation_dataset = dataset.shuffle().split(0.8)

training_dataloader, validation_dataloader = \
    torch.utils.data.DataLoader(training_dataset, batch_size=1, shuffle=True),\
    torch.utils.data.DataLoader(validation_dataset, batch_size=1)


In [None]:
class LicensePlateBBOXDetector(torch.nn.Module):
    def __init__(self):
        super(LicensePlateBBOXDetector, self).__init__()
        
        self.base_model = torchvision.models.resnet50(pretrained=True)
        self.base_model.eval()

        self.net = torch.nn.Sequential(
            torch.nn.Linear(self.base_model.fc.in_features, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 4),
            torch.nn.Sigmoid()
        )

        self.base_model.fc = torch.nn.Identity()

    def forward(self, input):
        input = self.base_model(input)
        return input

In [None]:
model = LicensePlateBBOXDetector()
criterion = torch.nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

if torch.cuda.is_available():
    model = model.cuda()

def train(num_epochs, dataloader, model, criterion, optimizer):
    for epoch in range(num_epochs):
        for index, (inputs, targets) in enumerate(dataloader):
            if index % 100 == 0:
                print(f"Processing batch: {index}/{len(dataloader)}")
            for batch_index in range(len(inputs)):
                input, target = inputs[batch_index], targets[batch_index]
                input = input.unsqueeze(0)
                target = target.unsqueeze(0)

                output = model(input)
                loss = criterion(output, target)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

def validate(dataloader, model, criterion):
    total_loss = 0.0
    with torch.no_grad():
        for index, (inputs, targets) in enumerate(dataloader):
            for batch_index in range(len(inputs)):
                input, target = inputs[batch_index].unsqueeze(0), targets[batch_index].unsqueeze(0)
                output = model(input)
                loss = criterion(output, target)
                total_loss += loss
                break

    print(f"Total loss: {total_loss:.3f}")


validate(validation_dataloader, model, torch.nn.L1Loss())
#train(1, training_dataloader, model, torch.nn.L1Loss(), optimizer)


  return F.l1_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (2048) must match the size of tensor b (4) at non-singleton dimension 1