In [1]:
# Mount into drive

from google.colab import drive

drive.mount("/content/drive")

%cd '/content/drive/MyDrive/ColabNotebooks/gt_omscs_ml/deep_learning/Convolutionalists/FoodforDeepThought'

!pip install -q condacolab
import condacolab
condacolab.install()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/ColabNotebooks/gt_omscs_ml/deep_learning/Convolutionalists/FoodforDeepThought
✨🍰✨ Everything looks OK!


In [None]:
!conda install pip pytorch=2.5.1 torchvision=0.20.1 jupyter ipykernel torchmetrics

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ 

In [None]:
!pip install openimages ultralytics==8.3.40 opencv-python matplotlib Pillow requests scipy tqdm pandas seaborn tensorboard torchmetrics[detection] transformers==4.46.3

In [None]:
# utils
from src.model_managers.standard_model_manager import (StandardModelManager,
                                                       FRCNNModelManager)
from tqdm import tqdm, tqdm_notebook
import matplotlib as plt
import pandas as pd
import numpy as np
import time
import os

# torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import (fasterrcnn_resnet50_fpn_v2,
                                         fasterrcnn_resnet50_fpn)
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torchvision import tv_tensors
import torch.optim as optim
import torch.nn as nn
import torchvision
import torch

# transfomers
from transformers import BertTokenizer, BertForQuestionAnswering
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# load data
from src.dataset_loaders.download_openimages import (OpenImagesLoader,
                                                     ImageLoaderFRCNN)
from src.dataset_loaders.fruits360 import Fruits360Loader

# set device
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'

print(f"Device being used: {device}")

In [None]:
# get data
# fl = Fruits360Loader(random_seed=101,
#                      batch_size=128,
#                      perc_keep=1.0)
# train_fl, val_fl, test_fl = fl.load_data()

# oil = OpenImagesLoader(random_seed=101,
#                         batch_size=128,
#                         perc_keep=1.0,
#                         num_images_per_class=500,
#                        annotation_format='pascal')

# oil.download_data()
# oil.split_data(keep_class_dirs=False)
#train, val, test = oil.get_datasets()


# csb = pd.read_csv("data/openimages_csv_dir/class-descriptions-boxable.csv",
#                   header=None,
#                   names=['LabelName',
#                            'Label'])
# trabb = pd.read_csv("data/openimages_csv_dir/train-annotations-bbox.csv",)
# vabb = pd.read_csv("data/openimages_csv_dir/validation-annotations-bbox.csv",)
# teabb = pd.read_csv("data/openimages_csv_dir/test-annotations-bbox.csv",)

# food_cats = ["Hot dog", "French fries", "Waffle", "Pancake", "Burrito", "Pretzel",
#             "Popcorn", "Cookie", "Muffin", "Ice cream", "Cake", "Candy",
#             "Guacamole", "Apple", "Grape", "Common fig", "Pear",
#             "Strawberry", "Tomato", "Lemon", "Banana", "Orange", "Peach", "Mango",
#             "Pineapple", "Grapefruit", "Pomegranate", "Watermelon", "Cantaloupe",
#             "Egg (Food)", "Bagel", "Bread", "Doughnut", "Croissant",
#             "Tart", "Mushroom", "Pasta", "Pizza", "Squid",
#             "Oyster", "Lobster", "Shrimp", "Crab", "Taco", "Cooking spray",
#             "Cucumber", "Radish", "Artichoke", "Potato", "Garden Asparagus",
#             "Pumpkin", "Zucchini", "Cabbage", "Carrot", "Salad",
#             "Broccoli", "Bell pepper", "Winter melon", "Honeycomb",
#             "Hamburger", "Submarine sandwich", "Cheese", "Milk", "Sushi"]

# csb = csb[csb['Label'].isin(food_cats)]
# csb

# print(len(trabb))
# trabb = trabb[trabb['LabelName'].isin(csb["LabelName"])]
# print(len(trabb))
# trabb_csb = pd.merge(csb, trabb, on='LabelName', how='inner')
# print(len(trabb_csb))
# trabb_csb

# print(len(vabb))
# vabb = vabb[vabb['LabelName'].isin(csb["LabelName"])]
# print(len(vabb))
# vabb_csb = pd.merge(csb, vabb, on='LabelName', how='inner')
# print(len(vabb_csb))
# vabb_csb

# print(len(teabb))
# teabb = teabb[teabb['LabelName'].isin(csb["LabelName"])]
# print(len(teabb))
# teabb_csb = pd.merge(csb, teabb, on='LabelName', how='inner')
# print(len(teabb_csb))
# teabb_csb

# train_target = [{"label": torch.Tensor(row["Label"]),
#                  "boxes": torch.tensor([row["XMin"], row["XMax"], row["YMin"], row["YMax"]])}
#                 for _, row in trabb_csb.iterrows()]
# train_target

def get_transform(train):
    transf = []
    transf.append(v2.Resize((100)))
    transf.append(v2.ToTensor())
    if train:
        transf.append(v2.Normalize(mean=[0.485,
                                         0.456,
                                         0.406],
                                   std=[0.229,
                                        0.224,
                                        0.225]))

    return v2.Compose(transf)

ttform = get_transform(train=True)
vtform = get_transform(train=False)

loader = OpenImagesLoader(random_seed=101,
                         batch_size=2,
                         perc_keep=1.0,
                         num_images_per_class=500,)
opim_dir = loader.data_dir
seed = loader.random_seed
batch_size = loader.batch_size
per_keep = loader.perc_keep
im_per_class = loader.num_images_per_class

ann_form = loader.annotation_format
classes = loader.classes
class2index = loader.class_2_index
train_direct = loader.train_dir
val_direct = loader.val_dir
test_direct = loader.test_dir

train_dataset = ImageLoaderFRCNN(root=train_direct,
                                 classes=classes,
                                 tforms=ttform)
val_dataset = ImageLoaderFRCNN(root=val_direct,
                               classes=classes,
                               tforms=vtform)
test_dataset = ImageLoaderFRCNN(root=test_direct,
                               classes=classes,)

len(train_dataset), len(val_dataset), len(test_dataset)

In [None]:
train_idx = list(range(1000))  # This will include indices 0 to 7500
val_idx = list(range(100))  # This will include indices 0 to 1000
test_idx = list(range(100))  # This will include indices 0 to 1000
tr_samp = SubsetRandomSampler(train_idx)
val_samp = SubsetRandomSampler(val_idx)
te_samp = SubsetRandomSampler(test_idx)

def collate(data):
    return tuple(zip(*data))

# without sampling lengths were train_loader: 98, val_loader: 13, test_loader: 13
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=4,
                                           collate_fn=collate,
                                           sampler=tr_samp)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         collate_fn=collate,
                                         sampler=val_samp)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         collate_fn=collate,
                                         sampler=te_samp)
len(train_loader), len(val_loader), len(test_loader)

In [None]:
# get/create model
def get_model(num_classes):
    # model types: fasterrcnn_resnet50_fpn,
    #              fasterrcnn_resnet50_fpn_v2,
    #              fasterrcnn_mobilenet_v3_large_fpn,
    #              fasterrcnn_mobilenet_v3_large_320_fpn
    model = fasterrcnn_resnet50_fpn_v2(weights="DEFAULT")

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

model = get_model(num_classes=138)

In [None]:
# train and evaluate model
lr = 0.001
epochs = 10
metric = MeanAveragePrecision()
optimizer = optim.AdamW(model.parameters(), lr=lr)
smmfr = FRCNNModelManager(model=model,
                         metric=metric,
                         optimizer=optimizer,
                         device=device)

In [None]:
# import xml.etree.ElementTree as ET
# from PIL import Image

# data_dir = os.path.join("data", "openimages")
# train_dir = os.path.join(data_dir, "train") # Directory in which train dataset resides
# imgs = list(sorted(os.listdir(os.path.join(train_dir, "images"))))
# annotations = list(sorted(os.listdir(os.path.join(train_dir, "annotations"))))
# for i, c in enumerate(classes):
#     img_path = os.path.join(train_dir, "images", imgs[i])
#     ann_path = os.path.join(train_dir, "annotations", annotations[i])

#     img = Image.open(img_path).convert("RGB")

#     # Parse the XML annotation file
#     tree = ET.parse(ann_path)
#     root = tree.getroot()

#     boxes = []
#     labels = []
#     for obj in root.findall('object'):
#         label = obj.find('name').text.capitalize()
#         if "food" in label:
#             label = label.replace("food", "Food")
#         print(label)


# fruits 360
# smm.train(training_data_loader=train_fl,
#           validation_data_loader=val,
#           epochs=epochs,
#           has_box=False)

# google colab
# reduce batch size
# reduce image size
# smaller subset of data

smmfr.train(training_data_loader=train_loader,
            validation_data_loader=val_loader,
            epochs=epochs,)