In [None]:
import torch
import torchvision
from torchvision.transforms import v2 as T
from torchsummary import summary

# paths
import os
import sys

# set paths
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# my imports
from datasets.voc_subset import VOCSubset
from utils import bbox_utils
from models.models import SoSiDetectionModel

# the lifesaver
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [71]:
# torch setup
torch.manual_seed = 42
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## ID calculation
Select backbone based on ID sum:

In [41]:
id1 = '308564293'
id2 = '040830861'

id_sum = sum(int(digit) for digit in (id1 + id2))
id_sum = sum(int(digit) for digit in str(id_sum))
print(f'ID digits sum is: {id_sum}, selecting MobilNet V3.')

ID digits sum is: 7, selecting MobilNet V3.


## P1 - Single Class, Single Instance
In this part we will train a detector for a single class, single instance per image.

### Model Construction
We begin by loading the single object, single instance (sosi) detection model, with the MobilNet V3 Backbone.

### Dataset
Next, we load the `subset` dataset with the single class `cat` and split for train and validation. Since we have only ~1000 images we won't split into 3 (test).

In [35]:
# define the selected class
selected_class_name = "cat"

# load the file containing the indices of images with the selected class
indices_file = os.path.join("..","datasets","indices", f"VOC_{selected_class_name}_selected_indices.pth")
indices_list = torch.load(indices_file)

#### Transforms
Before bulding the dataset we need to define transformations on the data.

We will take the basic data transformations from the backbone of the model, and add a few augmentations later.

In [None]:
# TODO finish
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

### Dataset and Dataloader
Build two datasets, each with the appropriate transformations, split and build dataloader.

In [None]:
# training dataset
trainDataset = VOCSubset(
    indices_list = indices_list, 
    selected_class = selected_class_name,
    single_object = True,
    transform = torchvision.transforms.ToTensor()
)

# validation dataset
valDataset = VOCSubset(
    indices_list = indices_list, 
    selected_class = selected_class_name,
    single_object = True,
    transform = torchvision.transforms.ToTensor()
)

# split the indices to a train-valid ratio randomly
split_ratio = 0.7
idx = torch.randperm(len(trainDataset)).tolist()
train_size = int(split_ratio * len(idx))

# apply the split to the datasets
train_dataset = torch.utils.data.Subset(trainDataset, idx[:train_size])
val_dataset = torch.utils.data.Subset(valDataset, idx[train_size:])

Build data loaders:

In [None]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    collate_fn=utils.collate_fn
)

val_dataloader= torch.utils.data.DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    collate_fn=utils.collate_fn
)