In [1]:
# imports
from torch.utils.data import Dataset

from torch import nn

from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.nn import CrossEntropyLoss
from torch.nn import MSELoss
from torch.optim import Adam
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split
#from imutils import paths
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle
import torch
import time
import cv2
import os
import json
from typing import Dict, List

In [None]:
class ObjectDetector(nn.Module):
    def __init__(self, base_model, num_classes) -> None:
        super().__init__()
        self.base_model = base_model
        self.regressor = nn.Sequential(
            nn.Linear(base_model.fc.in_features, 128), # don't really understand what is happening here with base_model.fc.in_features
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(32, 4),
            nn.Sigmoid()
            
        )
        self.classifier = nn.Sequential(
			nn.Linear(base_model.fc.in_features, 512),
			nn.ReLU(),
			nn.Dropout(),
			nn.Linear(512, 512),
			nn.ReLU(),
			nn.Dropout(),
			nn.Linear(512, self.numClasses)
		)
		# set the classifier of our base model to produce outputs
		# from the last convolution block
        self.base_model.fc = nn.Identity()
    def forward(self, x):
        features = self.base_model(x)
        bboxes = self.regressor(x)
        class_logits = self.classifier(x)
        return (bboxes, class_logits)



In [2]:

class ImageDataset(Dataset):
    def __init__(self, annotations_path, images_path, transforms=None) -> None:
        self.transforms = transforms
        self.images: List[torch.Tensor] = []
        self.image_bboxes: List[Dict] = []
        
        annotations_files = os.listdir(annotations_path)
        for json_file_name in annotations_files:
            file_path = annotations_path + json_file_name
            f  = open(file_path, 'r')
            annotation: Dict = json.load(f)[0] # as it is a list with one element the dict
            coordinates: Dict = {
                "boxes": []
            }
            
            for bbox in annotation["annotations"]:
                coordinates["boxes"].append(bbox["coordinates"])

            self.image_bboxes.append(coordinates)
            image = cv2.imread(images_path + annotation["image"])
            image: torch.Tensor = torch.tensor(image).permute(1,0,2) # (X, Y, RGB) (W,H,RGB) we do this to match with the bboxes coordinates (x,y)
            self.images.append(image)
        
    def __getitem__(self, idx):
        
        if self.transforms:
            return (self.transforms(self.images[idx]) , self.image_bboxes[idx])
        else:
            return (self.images[idx], self.image_bboxes[idx])
    def __len__(self):
        return self.images.size(0)



In [3]:
dataset = ImageDataset(annotations_path="../../datasets/annotations/", images_path="../../../frames/")
dataset[0]

(tensor([[[180, 145, 102],
          [255, 224, 181],
          [252, 218, 175],
          ...,
          [ 64,  30, 225],
          [ 68,  38, 221],
          [ 54,  26, 203]],
 
         [[122,  83,  44],
          [255, 233, 193],
          [255, 234, 194],
          ...,
          [ 64,  30, 225],
          [ 68,  38, 221],
          [ 54,  26, 203]],
 
         [[ 88,  43,   9],
          [190, 148, 113],
          [255, 225, 187],
          ...,
          [ 64,  30, 225],
          [ 68,  38, 221],
          [ 54,  26, 203]],
 
         ...,
 
         [[195, 155, 126],
          [248, 206, 177],
          [216, 172, 143],
          ...,
          [ 66,  32, 222],
          [ 68,  38, 217],
          [ 54,  26, 199]],
 
         [[205, 167, 137],
          [241, 201, 172],
          [176, 134, 105],
          ...,
          [ 65,  31, 221],
          [ 66,  36, 215],
          [ 53,  25, 198]],
 
         [[215, 179, 149],
          [239, 201, 171],
          [147, 107,  78],
   