In [8]:
# imports
from torch.utils.data import Dataset

from torch import nn

from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.nn import CrossEntropyLoss
from torch.nn import MSELoss
from torch.optim import Adam
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split
#from imutils import paths
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle
import torch
import time
import cv2
import os
import json
from typing import Dict, List
from torchvision.transforms import Normalize

In [2]:
class ObjectDetector(nn.Module):
    def __init__(self, base_model, num_classes) -> None:
        super().__init__()
        self.base_model = base_model
        self.regressor = nn.Sequential(
            nn.Linear(base_model.fc.in_features, 128), 
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(32, 4),
            nn.Sigmoid()
            
        )
        self.classifier = nn.Sequential(
			nn.Linear(base_model.fc.in_features, 512),
			nn.ReLU(),
			nn.Dropout(),
			nn.Linear(512, 512),
			nn.ReLU(),
			nn.Dropout(),
			nn.Linear(512, self.numClasses)
		)
		# set the classifier of our base model to produce outputs
		# from the last convolution block
        self.base_model.fc = nn.Identity()
    def forward(self, x):
        features = self.base_model(x)
        bboxes = self.regressor(x)
        class_logits = self.classifier(x)
        return (bboxes, class_logits)



In [195]:

class ImageDataset(Dataset):
    def __init__(self, annotations_path, images_path, transforms=None) -> None:
        self.transforms = transforms
        self.images: List[torch.Tensor] = []
        self.image_bboxes: List[Dict] = []
        
        annotations_files = os.listdir(annotations_path)
        for json_file_name in annotations_files:
            file_path = annotations_path + json_file_name
            f = open(file_path, 'r')
            annotation: Dict = json.load(f)[0] # as it is a list with one element the dict
            coordinates: Dict = {
                "boxes": []
            }
            
            for bbox in annotation["annotations"]:
                coordinates["boxes"].append(bbox["coordinates"])

            self.image_bboxes.append(coordinates)
            image = cv2.imread(images_path + annotation["image"])
            image: torch.Tensor = torch.tensor(image, dtype=float).permute(1,0,2) # (X, Y, RGB) (W,H,RGB) we do this to match with the bboxes coordinates (x,y)
            self.images.append(image)
        self.means = []
        self.stds = []
        for image_tensor in self.images:
            for channel in range(image_tensor.shape[2]):
                self.means.append(image_tensor[:, :, channel].reshape(image_tensor.shape[0] * image_tensor.shape[1]).mean(dtype=float))
                self.stds.append(image_tensor[:, :, channel].reshape(image_tensor.shape[0] * image_tensor.shape[1]).std())
        
    def __getitem__(self, idx): 
        if self.transforms:
            return (self.transforms(self.images[idx]) , self.image_bboxes[idx])
        else:
            return (self.images[idx], self.image_bboxes[idx])
    def __len__(self):
        return self.images.size(0)



In [197]:
dataset = ImageDataset(
    annotations_path="../datasets/annotations/",
    images_path="../../frames/",)
dataset.means

<class 'torch.Tensor'>


[tensor(160.4634, dtype=torch.float64),
 tensor(87.7588, dtype=torch.float64),
 tensor(103.3183, dtype=torch.float64),
 tensor(160.8827, dtype=torch.float64),
 tensor(87.8500, dtype=torch.float64),
 tensor(103.3913, dtype=torch.float64),
 tensor(160.8849, dtype=torch.float64),
 tensor(87.9408, dtype=torch.float64),
 tensor(103.4789, dtype=torch.float64),
 tensor(161.1748, dtype=torch.float64),
 tensor(88.1728, dtype=torch.float64),
 tensor(103.7497, dtype=torch.float64),
 tensor(160.7850, dtype=torch.float64),
 tensor(86.2934, dtype=torch.float64),
 tensor(106.5704, dtype=torch.float64),
 tensor(160.7273, dtype=torch.float64),
 tensor(86.2996, dtype=torch.float64),
 tensor(106.5258, dtype=torch.float64),
 tensor(160.7880, dtype=torch.float64),
 tensor(86.3078, dtype=torch.float64),
 tensor(106.5508, dtype=torch.float64),
 tensor(160.5984, dtype=torch.float64),
 tensor(86.3099, dtype=torch.float64),
 tensor(106.3697, dtype=torch.float64),
 tensor(160.9675, dtype=torch.float64),
 tensor(