In [1]:
# utils
from src.model_managers.standard_model_manager import StandardModelManager
from src.model_managers.standard_model_manager import FRCNNModelManager
from tqdm import tqdm, tqdm_notebook
import matplotlib as plt
import numpy as np
import time
import os

# torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torch.optim as optim
import torch.nn as nn
import torchvision
import torch

# transfomers
from transformers import BertTokenizer, BertForQuestionAnswering
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# load data
from src.dataset_loaders.fruits360 import Fruits360Loader
from src.dataset_loaders.download_openimages import OpenImagesLoader

# set device
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'

print(f"Device being used: {device}")

Device being used: cpu


In [None]:
# get data
fl = Fruits360Loader(random_seed=101,
                     batch_size=128,
                     perc_keep=1.0)
train_fl, val_fl, test_fl = fl.load_data()

oil = OpenImagesLoader(random_seed=101,
                       batch_size=128,
                       perc_keep=1.0)
# oil.download_data()
# oil.split_data()

In [None]:
def get_datasets(self):

        """ This function strives to get datasets to the local data directory if
        it has not already been downloaded previously. This function also splits the datasets into training,
        validation, and testing sets, assigning them as class variables. """

        # Note - this assumes the openimages  dataset has already been downloaded to their respective directories:.
        # If the dataset has not been downloaded, then please manually download it and place it in the directories
        # as described in the class initialization:
        self.train_dir = os.path.join(self.data_dir, "train")  # Directory in which dataset resides
        self.val_dir = os.path.join(self.data_dir, "val")
        self.test_dir = os.path.join(self.data_dir, "test")
        train_raw = ImageFolder(self.train_dir, transform=self.transforms)
        val_raw = ImageFolder(self.val_dir, transform=self.transforms)
        test_raw = ImageFolder(self.test_dir, transform=self.transforms)

        # Seed generator:
        generator = torch.Generator().manual_seed(self.random_seed)

        if self.perc_keep != 1.00:
            # Calculating the limited sizes of the datasets to keep:
            train_size = int(len(train_raw) * self.perc_keep)
            val_size = int(len(val_raw) * self.perc_keep)
            test_size = int(len(test_raw) * self.perc_keep)

            # Decreasing the size of the datasets using random_split:
            train_raw, _ = random_split(train_raw, [train_size, (len(train_raw)-train_size)])
            val_raw, _ = random_split(val_raw, [val_size, (len(val_raw)-val_size)])
            test_raw, _ = random_split(test_raw, [test_size, (len(test_raw)-test_size)])

        train_set = DataLoader(train_raw, batch_size=self.batch_size, shuffle=True) # Applying a DataLoader to the test set
        val_set = DataLoader(val_raw, batch_size=self.batch_size, shuffle=True) # Applying a DataLoader to the test set
        test_set = DataLoader(test_raw, batch_size=self.batch_size, shuffle=True) # Applying a DataLoader to the test set
        
        return train_set, val_set, test_set


In [None]:
# get/create model
def get_model(num_classes):
    # model types: fasterrcnn_resnet50_fpn,
    #              fasterrcnn_resnet50_fpn_v2,
    #              fasterrcnn_mobilenet_v3_large_fpn,
    #              fasterrcnn_mobilenet_v3_large_320_fpn
    model = fasterrcnn_resnet50_fpn_v2(weights="DEFAULT")

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model
        
model = get_model(num_classes=138)

In [None]:
# train and evaluate model
lr = 0.001
epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=lr)
smm = FRCNNModelManager(model=model, 
                        criterion=criterion, 
                        optimizer=optimizer,
                        device=device)

In [None]:
for idx, (data, target) in enumerate(train_fl):
    if idx < 2:
        print(f"idx: {idx}\nlen data: {len(data)}\n len target: {len(target)}")
        if idx == 1:
            break

idx: 0
len data: 128
 len target: 128
idx: 1
len data: 128
 len target: 128


In [None]:
smm.train(training_data_loader=train_fl,
          validation_data_loader=val_fl,
          epochs=epochs)