In [None]:
# # Mount into drive

# from google.colab import drive

# drive.mount("/content/drive")

# %cd '/content/drive/MyDrive/ColabNotebooks/gt_omscs_ml/deep_learning/Convolutionalists/FoodforDeepThought'

# !pip install -q condacolab
# import condacolab
# condacolab.install()

In [None]:
# !conda install pip pytorch=2.5.1 torchvision=0.20.1 jupyter ipykernel torchmetrics

In [None]:
# !pip install openimages ultralytics==8.3.40 opencv-python matplotlib Pillow requests scipy tqdm pandas seaborn tensorboard torchmetrics[detection] transformers==4.46.3

In [1]:
# utils
from src.model_managers.standard_model_manager import (StandardModelManager,
                                                       FRCNNModelManager)
from tqdm import tqdm, tqdm_notebook
import matplotlib as plt
import pandas as pd
import numpy as np
import time
import os

# torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import (fasterrcnn_resnet50_fpn_v2,
                                         fasterrcnn_resnet50_fpn)
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torchvision import tv_tensors
import torch.optim as optim
import torch.nn as nn
import torchvision
import torch

# transfomers
from transformers import BertTokenizer, BertForQuestionAnswering
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# load data
from src.dataset_loaders.download_openimages import (OpenImagesLoader,
                                                     ImageLoaderFRCNN)
from src.dataset_loaders.fruits360 import Fruits360Loader

# set device
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'

print(f"Device being used: {device}")

Device being used: cpu


In [2]:
def get_transform(train):
    def resize_with_boxes(image, target):
        orig_size = image.size
        new_size = 100
        image = v2.Resize((new_size, new_size))(image)
        if target is not None:
            scale_x, scale_y = new_size / orig_size[0], new_size / orig_size[1]
            target["boxes"][:, [0, 2]] *= scale_x
            target["boxes"][:, [1, 3]] *= scale_y
        return image, target
    #transf = []
    #transf.append(v2.Resize((100)))
    #transf.append(v2.ToTensor())
    transf = [resize_with_boxes, v2.ToTensor()]
    
    if train:
        transf.append(v2.Normalize(mean=[0.485,
                                         0.456,
                                         0.406],
                                   std=[0.229,
                                        0.224,
                                        0.225]))

    return v2.Compose(transf)

ttform = get_transform(train=True)
vtform = get_transform(train=False)

loader = OpenImagesLoader(random_seed=101,
                         batch_size=2,
                         perc_keep=1.0,
                         num_images_per_class=500,)
opim_dir = loader.data_dir
seed = loader.random_seed
batch_size = loader.batch_size
per_keep = loader.perc_keep
im_per_class = loader.num_images_per_class

ann_form = loader.annotation_format
classes = loader.classes
class2index = loader.class_2_index
train_direct = loader.train_dir
val_direct = loader.val_dir
test_direct = loader.test_dir

train_dataset = ImageLoaderFRCNN(root=train_direct,
                                 classes=classes,
                                 tforms=ttform)
val_dataset = ImageLoaderFRCNN(root=val_direct,
                               classes=classes,
                               tforms=vtform)
test_dataset = ImageLoaderFRCNN(root=test_direct,
                               classes=classes,)

len(train_dataset), len(val_dataset), len(test_dataset)

(12531, 1601, 1660)

In [3]:
train_idx = list(range(750))  # This will include indices 0 to 2500
val_idx = list(range(100))  # This will include indices 0 to 1000
test_idx = list(range(100))  # This will include indices 0 to 1000
tr_samp = SubsetRandomSampler(train_idx)
val_samp = SubsetRandomSampler(val_idx)
te_samp = SubsetRandomSampler(test_idx)

def collate(data):
    return tuple(zip(*data))

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=4,
                                           collate_fn=collate,
                                           sampler=tr_samp)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         collate_fn=collate,
                                         sampler=val_samp)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         collate_fn=collate,
                                         sampler=te_samp)
len(train_loader), len(val_loader), len(test_loader)

(375, 50, 50)

In [4]:
# create model
def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn_v2(weights="COCO_V1")

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

model = get_model(num_classes=138)

In [5]:
# train and evaluate model
lr = 0.001
epochs = 10
metric = MeanAveragePrecision()
optimizer = optim.AdamW(model.parameters(), lr=lr)
smmfr = FRCNNModelManager(model=model,
                         metric=metric,
                         optimizer=optimizer,
                         device=device)

Before transf img size: (1024, 1024)
boxes size:
tensor([[147., 243., 252., 339.],
        [148., 336., 235., 427.],
        [198., 385., 318., 460.],
        [275., 230., 380., 324.],
        [291.,  51., 425., 144.],
        [316., 280., 519., 494.],
        [344.,  97., 536., 291.],
        [390., 702., 579., 903.],
        [577., 265., 644., 366.],
        [601., 356., 812., 556.],
        [604., 155., 720., 284.],
        [617., 934., 800., 971.],
        [659., 284., 740., 372.],
        [824., 307., 964., 494.],
        [865., 716., 992., 961.]])


After transf img size: torch.Size([3, 100, 100])
boxes size:
tensor([[14.3555, 23.7305, 24.6094, 33.1055],
        [14.4531, 32.8125, 22.9492, 41.6992],
        [19.3359, 37.5977, 31.0547, 44.9219],
        [26.8555, 22.4609, 37.1094, 31.6406],
        [28.4180,  4.9805, 41.5039, 14.0625],
        [30.8594, 27.3438, 50.6836, 48.2422],
        [33.5938,  9.4727, 52.3438, 28.4180],
        [38.0859, 68.5547, 56.5430, 88.1836],
        [56.3477, 25.8789, 62.8906, 35.7422],
        [58.6914, 34.7656, 79.2969, 54.2969],
        [58.9844, 15.1367, 70.3125, 27.7344],
        [60.2539, 91.2109, 78.1250, 94.8242],
        [64.3555, 27.7344, 72.2656, 36.3281],
        [80.4688, 29.9805, 94.1406, 48.2422],
        [84.4727, 69.9219, 96.8750, 93.8477]])

In [None]:
smmfr.train(training_data_loader=train_loader,
            validation_data_loader=val_loader,
            epochs=epochs,)