In [None]:
import os
import time
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

# from src.model_managers.detr_model_manager import DETRModelManager
from src.dataset_loaders.download_openimages import OpenImagesLoader
# from src.dataset_loaders.detr_dataset_processor import DETRDatasetProcessor
from tqdm import tqdm, tqdm_notebook


from transformers import DetrForObjectDetection


In [None]:

# Device Configuration:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print(f"Device being used: {device}")

In [None]:
import os
import torchvision.transforms.v2 as transforms
from openimages.download import download_dataset
import random
import shutil
import torch
from torch.utils.data import DataLoader
from PIL import Image as PILImage
from torchvision import tv_tensors
from torchvision.transforms.v2 import Resize
from torchvision.ops import box_convert


class DETRDatasetProcessor():

    def __init__(self, random_seed = 101, batch_size = 128, perc_keep = 1.0, num_images_per_class=500):
        self.data_dir = os.path.join("data", "openimages")  # Directory in which dataset resides
        self.random_seed = random_seed
        self.batch_size = batch_size
        self.perc_keep = perc_keep  # Percentage of dataset to be kept (number between 0 and 1)
        self.num_images_per_class = num_images_per_class

        self.transforms_all = transforms.Compose(
            [
                Resize((512, 512)),
            ]
        )

        self.transforms_img = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet's normalization statistics
            ]
        )

        self.classes = [
            "Hot dog", "French fries", "Waffle", "Pancake", "Burrito", "Pretzel",
            "Popcorn", "Cookie", "Muffin", "Ice cream", "Cake", "Candy",
            "Guacamole", "Apple", "Grape", "Common fig", "Pear",
            "Strawberry", "Tomato", "Lemon", "Banana", "Orange", "Peach", "Mango",
            "Pineapple", "Grapefruit", "Pomegranate", "Watermelon", "Cantaloupe",
            "Egg (Food)", "Bagel", "Bread", "Doughnut", "Croissant",
            "Tart", "Mushroom", "Pasta", "Pizza", "Squid",
            "Oyster", "Lobster", "Shrimp", "Crab", "Taco", "Cooking spray",
            "Cucumber", "Radish", "Artichoke", "Potato", "Garden Asparagus",
            "Pumpkin", "Zucchini", "Cabbage", "Carrot", "Salad",
            "Broccoli", "Bell pepper", "Winter melon", "Honeycomb",
            "Hamburger", "Submarine sandwich", "Cheese", "Milk", "Sushi"
        ]

        # Creating a dictionary mapping each class name to an index:
        self.class_2_index = {}
        for i, class_name in enumerate(self.classes):
            self.class_2_index[class_name.lower()] = i

        # Creating a dictionary mapping each class index to its corresponding class name:
        self.index_2_class = {}
        for k, v in self.class_2_index.items():
            self.index_2_class[v] = k

        self.train_dir = os.path.join(self.data_dir, "train") # Directory in which train dataset resides
        self.val_dir = os.path.join(self.data_dir, "val") # Directory in which validation dataset resides
        self.test_dir = os.path.join(self.data_dir, "test") # Directory in which test dataset resides

        self.train_red_dir = os.path.join(self.data_dir, "train_reduced") # Directory in which reduced train dataset resides
        self.val_red_dir = os.path.join(self.data_dir, "val_reduced") # Directory in which reduced validation dataset resides
        self.test_red_dir = os.path.join(self.data_dir, "test_reduced") # Directory in which reduced test dataset resides


    def download_data(self, annotation_format='darknet'):
        # download_dataset(self.data_dir, self.classes, annotation_format=annotation_format, limit=5)
        
        for class_name in self.classes:
            print(f'Attempting to download {class_name} data')
            if not os.path.isdir(os.path.join(self.data_dir, class_name.lower())):
                try:
                    download_dataset(self.data_dir, [class_name], annotation_format=annotation_format, limit=5)
                except Exception as e:
                    print(f'An exception occurred for {class_name}. ERROR: {e}')
            else:
                print(f'Skipped {class_name}, data already downloaded')


    def split_data(self, keep_class_dirs=True):

        """ This function splits the downloaded Open Image dataset, and splits each class into training, validation, and testing sets.
            This function assumes that the required data has already been downloaded."""

        # Setting the random seed:
        random.seed(self.random_seed)
        
        splits = ["train", "val", "test"]

        # Making folders for each of the splits:
        for split in splits:
            split_dir = os.path.join(self.data_dir, split)
            os.makedirs(split_dir, exist_ok=True)

        # Iterating through each class:
        for class_cur in self.classes:
            print(f'Splitting data for class {class_cur}')

            # Getting directories for the images and annotations for each class:
            imgs_dir = os.path.join(self.data_dir, class_cur.lower(), "images")
            anns_dir = os.path.join(self.data_dir, class_cur.lower(), "darknet")

            # Ensuring each class has images and annotations:
            if not imgs_dir:
                raise Exception(f'Images do not exist for {class_cur}!')

            if not anns_dir:
                raise Exception(f'Annotations do not exist for {class_cur}!')

            class_imgs = os.listdir(imgs_dir) # Images for current class
            class_anns = os.listdir(anns_dir) # Annotations for current class
            class_imgs.sort()
            class_anns.sort()

            num_imgs = len(class_imgs) # Number of images and annotations for current class
            
            # Shuffling data:
            inds_list = list(range(num_imgs)) # List of indices ranging for the total number of images
            random.shuffle(inds_list) # Shuffling indices list
            class_imgs = [class_imgs[i] for i in inds_list] # Shuffling class images according to shuffled inds_list
            class_anns = [class_anns[i] for i in inds_list] # Shuffling class annotations according to shuffled inds_list

            ind_train = int(0.8 * num_imgs) # Ending index for the training images
            ind_val = ind_train + int(0.1 * num_imgs) # Ending index for the validation images

            # Splitting images into training, validation, and testing:
            train_imgs = class_imgs[:ind_train]
            val_imgs = class_imgs[ind_train:ind_val]
            test_imgs = class_imgs[ind_val:]

            all_imgs = [train_imgs, val_imgs, test_imgs] # All images
            
            # Splitting annotations into training, validation, and testing:
            train_anns = class_anns[:ind_train]
            val_anns = class_anns[ind_train:ind_val]
            test_anns = class_anns[ind_val:]

            all_anns = [train_anns, val_anns, test_anns] # All annotations
            
            # Looping through all split types and corresponding split images:
            for split_type, split_imgs, split_anns in zip(splits, all_imgs, all_anns):
                if keep_class_dirs:
                    # Creating each split directory for images and annotations for current class:
                    split_dir_img = os.path.join(self.data_dir, split_type, class_cur.lower(), "images")
                    split_dir_ann = os.path.join(self.data_dir, split_type, class_cur.lower(), "annotations")
                else:
                    split_dir_img = os.path.join(self.data_dir, split_type, "images")
                    split_dir_ann = os.path.join(self.data_dir, split_type, "annotations")

                os.makedirs(split_dir_img, exist_ok=True)
                os.makedirs(split_dir_ann, exist_ok=True)

                # Copying each image from initial directory to corresponding split directory for each split:
                for img, ann in zip(split_imgs, split_anns):
                    shutil.copy(os.path.join(imgs_dir, img), os.path.join(split_dir_img, img))
                    shutil.copy(os.path.join(anns_dir, ann), os.path.join(split_dir_ann, ann))

                    # Code to replace each original class label (which is 0) to the class label as found in self.class_2_index:
                    ann_file_cur_dir = os.path.join(split_dir_ann, ann) # File path of current annotation file
                    with open(ann_file_cur_dir, 'r') as file:
                        objects = file.readlines()

                        new_labels = []
                        for obj in objects:
                            obj_items = obj.split()
                            new_class_label = self.class_2_index[class_cur.lower()]
                            obj_items[0] = str(new_class_label)

                            obj_new = ' '.join(obj_items) + '\n'
                            new_labels.append(obj_new)
                    
                    with open(ann_file_cur_dir, 'w') as file:
                        file.writelines(new_labels)
                            

    def split_data_reduced(self, keep_class_dirs=True):

        """ This function splits the downloaded Open Image dataset, and splits each class into training, validation, and testing sets.
            This function assumes that the required data has already been downloaded.
            This function reduces the dataset by self.keep_perc. """

        # Setting the random seed:
        random.seed(self.random_seed)
        
        splits = ["train_reduced", "val_reduced", "test_reduced"]
        
        # Making folders for each of the splits:
        for split in splits:
            split_dir = os.path.join(self.data_dir, split)
            os.makedirs(split_dir, exist_ok=True)

        # Iterating through each class:
        for class_cur in self.classes:
            print(f'Splitting data for class {class_cur}')

            # Getting directories for the images and annotations for each class:
            imgs_dir = os.path.join(self.data_dir, class_cur.lower(), "images")
            anns_dir = os.path.join(self.data_dir, class_cur.lower(), "darknet")

            # Ensuring each class has images and annotations:
            if not imgs_dir:
                raise Exception(f'Images do not exist for {class_cur}!')

            if not anns_dir:
                raise Exception(f'Annotations do not exist for {class_cur}!')

            class_imgs = os.listdir(imgs_dir) # Images for current class
            class_anns = os.listdir(anns_dir) # Annotations for current class
            class_imgs.sort()
            class_anns.sort()

            num_imgs = len(class_imgs) # Number of images and annotations for current class
            
            if self.perc_keep != 1.00 and num_imgs > 50:
                num_imgs = int(num_imgs * self.perc_keep)
                class_imgs = class_imgs[:num_imgs]
                class_anns = class_anns[:num_imgs]

            # Shuffling data:
            inds_list = list(range(num_imgs)) # List of indices ranging for the total number of images
            random.shuffle(inds_list) # Shuffling indices list
            class_imgs = [class_imgs[i] for i in inds_list] # Shuffling class images according to shuffled inds_list
            class_anns = [class_anns[i] for i in inds_list] # Shuffling class annotations according to shuffled inds_list

            ind_train = int(0.8 * num_imgs) # Ending index for the training images
            ind_val = ind_train + int(0.1 * num_imgs) # Ending index for the validation images

            # Splitting images into training, validation, and testing:
            train_imgs = class_imgs[:ind_train]
            val_imgs = class_imgs[ind_train:ind_val]
            test_imgs = class_imgs[ind_val:]

            all_imgs = [train_imgs, val_imgs, test_imgs] # All images
            
            # Splitting annotations into training, validation, and testing:
            train_anns = class_anns[:ind_train]
            val_anns = class_anns[ind_train:ind_val]
            test_anns = class_anns[ind_val:]

            all_anns = [train_anns, val_anns, test_anns] # All annotations
            
            # Looping through all split types and corresponding split images:
            for split_type, split_imgs, split_anns in zip(splits, all_imgs, all_anns):
                if keep_class_dirs:
                    # Creating each split directory for images and annotations for current class:
                    split_dir_img = os.path.join(self.data_dir, split_type, class_cur.lower(), "images")
                    split_dir_ann = os.path.join(self.data_dir, split_type, class_cur.lower(), "annotations")
                else:
                    split_dir_img = os.path.join(self.data_dir, split_type, "images")
                    split_dir_ann = os.path.join(self.data_dir, split_type, "annotations")

                os.makedirs(split_dir_img, exist_ok=True)
                os.makedirs(split_dir_ann, exist_ok=True)

                # Copying each image from initial directory to corresponding split directory for each split:
                for img, ann in zip(split_imgs, split_anns):
                    shutil.copy(os.path.join(imgs_dir, img), os.path.join(split_dir_img, img))
                    shutil.copy(os.path.join(anns_dir, ann), os.path.join(split_dir_ann, ann))

                    # Code to replace each original class label (which is 0) to the class label as found in self.class_2_index:
                    ann_file_cur_dir = os.path.join(split_dir_ann, ann) # File path of current annotation file
                    with open(ann_file_cur_dir, 'r') as file:
                        objects = file.readlines()

                        new_labels = []
                        for obj in objects:
                            obj_items = obj.split()
                            new_class_label = self.class_2_index[class_cur.lower()]
                            obj_items[0] = str(new_class_label)

                            obj_new = ' '.join(obj_items) + '\n'
                            new_labels.append(obj_new)
                    
                    with open(ann_file_cur_dir, 'w') as file:
                        file.writelines(new_labels)

        print(f"Dataset has been reduced!")


    def see_class_labels(self):
        """ Simple function to determine if all of the images have just one class in them. """

        # Change the split to "train", "val", or "test" to choose which directory you need to test:
        anns_dir =  os.path.join(self.data_dir, "test", "annotations")

        ann_files = os.listdir(anns_dir)

        for ann_file in ann_files:
            ann_file_dir = os.path.join(anns_dir, ann_file)

            with open(ann_file_dir, 'r') as file:
                objects = file.readlines()

                for obj in objects:
                    class_label = int(obj.split()[0])
                    if class_label != 0:
                        print(f"Object {obj} in file {ann_file}")





    def make_dataloader(self, split_name):
        """ Function to create a DataLoader object that's compatible with Facebook's DETR model.
        
        Inputs:
        split_name (str) - must be one of the following: "train", "train_reduced", "val", "val_reduced", "test", "test_reduced

        Outputs:
        dl (DataLoader) - DataLoader object

        """

        def collate_fn(data):
            """ Defining the collate function to pad and return images and list of annotations,
                as object-detection can have variable image sizes and variable number of objects
                in each image. 
            """

            # Extracting the images and corresponding annotations:
            imgs, anns = zip(*data)

            return torch.stack(imgs), anns        

        imgs_dir = os.path.join(self.data_dir, split_name, "images")
        anns_dir = os.path.join(self.data_dir, split_name, "annotations")

        # Lists of all the images and corresponding annotation files in the selected directory:
        imgs_list = os.listdir(imgs_dir)
        anns_list = os.listdir(anns_dir)

        dataset = []
        # Iterating through each image and annotation pair:
        for img_cur, ann_cur in zip(imgs_list, anns_list):
            
            # Directories of current image and annotation:
            img_cur_dir = os.path.join(imgs_dir, img_cur)
            ann_cur_dir = os.path.join(anns_dir, ann_cur)

            # Reading image:
            img_pil = PILImage.open(img_cur_dir).convert("RGB")
            img_size_orig = img_pil.size # (width, height) format
            # img_tv = TVImage(torch.tensor(img_pil).permute(2, 0, 1))

            ann_list = []
            # Reading annotation file:
            with open(ann_cur_dir, 'r') as file:

                objects = file.readlines()

                # Iterating through each object in the image (all assumed to equal to the target class):
                for obj in objects:
                    obj_items = obj.split()
                    class_label = int(obj_items[0]) # Class label
                    x_cent = float(obj_items[1]) # x-coordinate of bounding box's center
                    y_cent = float(obj_items[2]) # y-coordinate of bounding box's center
                    box_width = float(obj_items[3]) # Width of bounding box
                    box_height = float(obj_items[4]) # Height of bounding box

                    # Appending the bounding box information to the list of bounding box information:
                    ann_list.append([x_cent, y_cent, box_width, box_height])

                # Converting list of bounding box information to a PyTorch tensor:
                box_tensor = torch.tensor(ann_list, dtype=torch.float)

            # box_tensor_convert = box_convert(box_tensor, in_fmt='cxcywh', out_fmt='xyxy') # Converting bounding boxes from CXCYWH format to XYXY format to make it compatible with DETR model

            # Bounding box object for current annotation file:
            # bounding_boxes = tv_tensors.BoundingBoxes(box_tensor_convert, format="XYXY", canvas_size=img_size_orig)
            bounding_boxes = tv_tensors.BoundingBoxes(box_tensor, format="CXCYWH", canvas_size=img_size_orig)


            # Applying transformations to bounding boxes and image:
            img_trans = self.transforms_all(img_pil) # Applying the general transformations to the image
            bb_trans = self.transforms_all(bounding_boxes) # Applying the general transformations to the image's corresponding bounding boxes
            img_trans = self.transforms_img(img_trans) # Applying the image-specific transformations to the image (tensor conversion and normalization)

            num_labels = bb_trans.shape[0] # Number of objects
            labels_tensor = torch.ones(num_labels, dtype=torch.int64) * class_label # Creating a labels tensor

            ann_dict = {"boxes": bb_trans, "class_labels": labels_tensor}

            info_tuple = (img_trans, ann_dict)
            dataset.append(info_tuple)
        

        dataset_wrapper = DETRDataset(dataset)
        dl = DataLoader(dataset_wrapper, batch_size=self.batch_size, shuffle=True, collate_fn=collate_fn)

        return dl


class DETRDataset():
    def __init__(self, dataset):
        self.dataset = dataset

    def __getitem__(self, ind):
        return self.dataset[ind]

    def __len__(self):
        return len(self.dataset)
    


In [None]:
# Data Configuration & Hyperparameters:
PERC_KEEP = 1.00 # Proportion of data from datasets to keep
BATCH_SIZE = 16 # Batch size
EPOCHS = 50 # Number of epochs to train the model for
LEARNING_RATE = 1e-5 # Learning rate

In [None]:
### Loading Open Images Dataset:

# Initializing the DETRDatasetProcessor class:
detr_processor = DETRDatasetProcessor(batch_size=BATCH_SIZE, perc_keep=PERC_KEEP)
print(f"Number of classes: {len(detr_processor.classes)}")

# Downloading the Open Images Dataset in darknet format:
# detr_processor.download_data()

# Splitting the downloaded data into training, validation, and test sets:
# detr_processor.split_data(keep_class_dirs=False)

# Splitting the downloaded data into reduced training, validation, and test sets:
# detr_processor.split_data_reduced(keep_class_dirs=False)

# Creating training, validation, and testing dataloaders:
train_set = detr_processor.make_dataloader("train_reduced")
val_set = detr_processor.make_dataloader("val_reduced")
test_set = detr_processor.make_dataloader("test_reduced")

print(f"Number of Batches in Training Set: {len(train_set)}")
print(f"Number of Batches in Validation Set: {len(val_set)}")
print(f"Number of Batches in Testing Set: {len(test_set)}")




In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import copy
import numpy as np
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torchvision

class DETRModelManager:
    def __init__(self, model, optimizer, device=None):

        if device:
            self.device = device
        else:
            self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
        self.model = model.to(self.device)
        self.optimizer = optimizer
        self.confidence_threshold = 0.02
        self.best_accuracy = 0.0
        self.best_loss = np.inf
        self.best_model_state_dict = None
        
        self.training_losses = None
        self.val_losses = None
        self.training_maps = None
        self.val_maps = None
        self.training_maps_50 = None
        self.val_maps_50 = None

    def train(self, training_data_loader, validation_data_loader = None, epochs=10):

        # Creating empty lists for the training and validation losses and mAP values:
        training_losses = []
        val_losses = []
        
        training_maps = []
        val_maps = []

        training_maps_50 = []
        val_maps_50 = []

        for epoch in tqdm(range(epochs)):
            display_epoch = epoch + 1

            self.model.train() # Putting the model (back) in training mode
            
            loss_epoch = 0
            map_epoch = MeanAveragePrecision()


            for idx, (imgs, anns) in enumerate(training_data_loader):
                
                # Extracting height and width of each image in the batch as well as batch size:
                batch_size = imgs.shape[0]
                img_height = 512
                img_width = 512
                
                # Moving all input and target tensors to device:
                inputs = {"pixel_values": imgs.to(self.device)} # Creating inputs for model
                anns = [{key: val.to(self.device) for key, val in ann.items()} for ann in anns]

                # Forward pass:
                output = self.model(**inputs, labels=anns)

                # Loss from current batch:
                loss = output.loss
                loss_epoch += loss

                # Backward pass:
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                ### Check MAP:

                # Getting predictions:
                pred = []
                for ind in range(batch_size):

                    # Extracting normalized bounding boxes and scores:
                    pred_boxes = output.pred_boxes[ind]  # Normalized [center_x, center_y, width, height, score]
                    pred_scores, pred_labels = torch.max(torch.softmax(output.logits[ind], dim=-1), dim=-1)
                    # pred_labels = torch.argmax(output.logits[ind], dim=-1)

                    # Applying confidence threshold filtering:
                    valid_indices = pred_scores > self.confidence_threshold
                    pred_boxes = pred_boxes[valid_indices]
                    pred_scores = pred_scores[valid_indices]
                    pred_labels = pred_labels[valid_indices]

                    # Denormalizing bounding boxes:
                    pred_box_coords = pred_boxes[:, :4] # Removing the scores from the output tensor
                    pred_box_coords[:, 0] *= img_width  # center_x
                    pred_box_coords[:, 1] *= img_height  # center_y
                    pred_box_coords[:, 2] *= img_width  # width
                    pred_box_coords[:, 3] *= img_height  # height

                    # # Converting to (x_min, y_min, x_max, y_max):
                    # pred_boxes_xyxy = torch.zeros_like(pred_boxes)
                    # pred_boxes_xyxy[:, 0] = pred_boxes[:, 0] - (0.5 * pred_boxes[:, 2])  # x_min
                    # pred_boxes_xyxy[:, 1] = pred_boxes[:, 1] - (0.5 * pred_boxes[:, 3])  # y_min
                    # pred_boxes_xyxy[:, 2] = pred_boxes[:, 0] + (0.5 * pred_boxes[:, 2])  # x_max
                    # pred_boxes_xyxy[:, 3] = pred_boxes[:, 1] + (0.5 * pred_boxes[:, 3])  # y_max

                    # Creating the prediction dictionary:
                    pred_dict = {
                        'boxes': pred_box_coords,
                        'labels': pred_labels,
                        'scores': pred_scores,
                    }
                    pred.append(pred_dict)


                # Updating the mean average precision:
                anns_target = [{"boxes": ann["boxes"], "labels": ann["class_labels"]} for ann in anns]
                map_epoch.update(pred, anns_target)


            # Calculating the average loss for current epoch:
            loss_epoch_avg = loss_epoch / len(training_data_loader)
            training_losses.append(loss_epoch_avg.cpu().cpu().detach().numpy())

            map_epoch_results = map_epoch.compute()
            print(f"Outputs From Last Batch: {output.loss_dict}")
            print(f"Epoch {display_epoch} Average Loss: {loss_epoch_avg}")
            print(f"Epoch {display_epoch} Mean Average Precision: {map_epoch_results['map']}, {map_epoch_results['map_50']}")

            training_maps.append(map_epoch_results['map'].cpu().detach().numpy())
            training_maps_50.append(map_epoch_results['map_50'].cpu().detach().numpy())

        
            if validation_data_loader:
                
                loss_val = 0
                map_val = MeanAveragePrecision()
                self.model.eval() # Putting the model in evaluation mode

                for idx, (imgs, anns) in enumerate(validation_data_loader):

                    # Extracting height and width of each image in the batch as well as batch size:
                    batch_size = imgs.shape[0]
                    img_height = 512
                    img_width = 512                 

                    # Moving all input and target tensors to device:
                    inputs = {"pixel_values": imgs.to(self.device)}
                    anns = [{key: val.to(self.device) for key, val in ann.items()} for ann in anns]

                    with torch.no_grad():
                        
                        # Forward pass:
                        output = self.model(**inputs, labels=anns)

                        # Loss from current batch:
                        loss = output.loss
                        loss_val += loss # Adding current loss to the total loss across the batch

                        ### Check mAP:

                        pred = []
                        for ind in range(batch_size):

                            # Extracting normalized bounding boxes and scores:
                            pred_boxes = output.pred_boxes[ind]  # Normalized [center_x, center_y, width, height, score]
                            pred_scores, pred_labels = torch.max(torch.softmax(output.logits[ind], dim=-1), dim=-1)
                            # pred_labels = torch.argmax(output.logits[ind], dim=-1)

                            # Applying confidence threshold filtering:
                            valid_indices = pred_scores > self.confidence_threshold
                            pred_boxes = pred_boxes[valid_indices]
                            pred_scores = pred_scores[valid_indices]
                            pred_labels = pred_labels[valid_indices]

                            # Denormalizing bounding boxes:
                            pred_boxes[:, 0] *= img_width
                            pred_boxes[:, 1] *= img_height
                            pred_boxes[:, 2] *= img_width
                            pred_boxes[:, 3] *= img_height

                            # # Converting to (x_min, y_min, x_max, y_max):
                            # pred_boxes_xyxy = torch.zeros_like(pred_boxes)
                            # pred_boxes_xyxy[:, 0] = pred_boxes[:, 0] - (0.5 * pred_boxes[:, 2])
                            # pred_boxes_xyxy[:, 1] = pred_boxes[:, 1] - (0.5 * pred_boxes[:, 3])
                            # pred_boxes_xyxy[:, 2] = pred_boxes[:, 0] + (0.5 * pred_boxes[:, 2])
                            # pred_boxes_xyxy[:, 3] = pred_boxes[:, 1] + (0.5 * pred_boxes[:, 3])

                            pred_dict = {
                                'boxes': pred_boxes,
                                'labels': pred_labels,
                                'scores': pred_scores,
                            }
                            pred.append(pred_dict)


                        # Updating the mean average precision:
                        anns_target = [{"boxes": ann["boxes"], "labels": ann["class_labels"]} for ann in anns]
                        map_val.update(pred, anns_target)


            # Calculating average validation loss:
            loss_val_avg = loss_val / len(validation_data_loader)
            val_losses.append(loss_val_avg.cpu().detach().numpy())

            map_val_results = map_val.compute()

            val_maps.append(map_val_results['map'].cpu().detach().numpy())
            val_maps_50.append(map_val_results['map_50'].cpu().detach().numpy())

            print(f"Validation Loss: {loss_val_avg}")
            print(f"Validation Mean Average Precision: {map_val_results['map']}, {map_val_results['map_50']}")


            # Ensuring to save the model that achieves the lowest loss:
            if loss_val_avg < self.best_loss:
                self.best_loss = loss_val_avg
                self.best_model_state_dict = copy.deepcopy(self.model.state_dict())
        
        # Load best state after training for use:
        if self.best_model_state_dict is not None:
            self.model.load_state_dict(self.best_model_state_dict)

        # Setting the training and validation metric lists to their respective class variables:
        self.training_losses = training_losses
        self.val_losses = val_losses
        self.training_maps = training_maps
        self.val_maps = val_maps
        self.training_maps_50 = training_maps_50
        self.val_maps_50 = val_maps_50


    def test(self, test_data_loader):
        """ This function applies the trained model to the given test data. 
            It prints and returns the test accuracy.
        """

        loss_test = 0
        map_test = MeanAveragePrecision()
        self.model.eval() # Putting the model in evlauation mode

        for idx, (imgs, anns) in enumerate(test_data_loader):
            
            # Moving all input and target tensors to device:
            inputs = {"pixel_values": imgs.to(self.device)}
            anns = [{key: val.to(self.device) for key, val in ann.items()} for ann in anns]

            with torch.no_grad():
                
                # Forward pass:
                output = self.model(**inputs, labels=anns)

                # Loss from current batch:
                loss = output.loss
                loss_test += loss # Adding current loss to the total loss across the batch

                ### Check MAP:

                # Getting predictions:
                pred = []
                for ind in range(len(output.logits)):
                    pred_dict = {}
                    pred_dict['boxes'] = output.logits[ind]['boxes']
                    pred_dict['labels'] = output.logits[ind]['labels']
                    pred.append(pred_dict)

                # Updating the mean average precision
                map_test.update(pred, anns)

        # Calculating average validation loss:
        loss_test_avg = loss_test / len(test_data_loader)
        map_test_results = map_test.compute()

        print(f"Test Loss: {loss_test_avg}")
        print(f"Test Mean Average Precision: {map_test_results['map']}, {map_test_results['map_50']}")


    def plot_loss_curve(self, model_name):
        """
        This function plots the loss curve from the most recent training period of this model manager.
        
        Inputs:
        model_name (str) - Name of the model
        """

        title = model_name + " Loss Curve"
        filename = model_name + "_loss_curve.png"

        # Moving tensors to CPU:
        for i, values in enumerate(zip(self.training_losses, self.val_losses)):
            self.training_losses[i] = values[0]
            self.val_losses[i] = values[1]

        # Plotting training and validation accuracy values:
        plt.plot(self.training_losses, label='Training Loss')
        plt.plot(self.val_losses, label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title(title)
        plt.legend(loc='best')
        plt.savefig(filename, dpi=600)
        plt.show()


    def plot_map_curve(self, model_name):
        """
        This function plots the map curve from the most recent training period of this model manager.
        
        Inputs:
        model_name (str) - Name of the model
        """

        title = model_name + " mAP Curve"
        filename = model_name + "_map_curve.png"

        # Moving tensors to CPU:
        for i, values in enumerate(zip(self.training_maps, self.val_maps)):
            self.training_maps[i] = values[0]
            self.val_maps[i] = values[1]

        # Plotting training and validation accuracy values:
        plt.plot(self.training_maps, label='Training mAP')
        plt.plot(self.val_maps, label='Validation mAP')
        plt.xlabel('Epochs')
        plt.ylabel('Mean Average Precision')
        plt.title(title)
        plt.legend(loc='best')
        plt.savefig(filename, dpi=600)
        plt.show()


    def plot_map50_curve(self, model_name):
        """
        This function plots the map50 curve from the most recent training period of this model manager.
        
        Inputs:
        model_name (str) - Name of the model
        """

        title = model_name + " mAP50 Curve"
        filename = model_name + "_map50_curve.png"

        # Moving tensors to CPU:
        for i, values in enumerate(zip(self.training_maps_50, self.val_maps_50)):
            self.training_maps_50[i] = values[0]
            self.val_maps_50[i]=values[1]

        # Plotting training and validation accuracy values:
        plt.plot(self.training_maps_50, label='Training mAP50')
        plt.plot(self.val_maps_50, label='Validation mAP50')
        plt.xlabel('Epochs')
        plt.ylabel('Mean Average Precision 50')
        plt.title(title)
        plt.legend(loc='best')
        plt.savefig(filename, dpi=600)
        plt.show()


    def save(self, filepath):
        if self.best_model_state_dict is None:
            torch.save(self.model.state_dict(), filepath)
        else:
            torch.save(self.best_model_state_dict, filepath)

    def load(self, filepath):
        self.model.load_state_dict(torch.load(filepath, weights_only=True))

    def prompt_llm(self, model_preds, prompt):
        """prompts a llm for food nutrition facts, health benefits, and recipes

        Args:
            model_preds (string): model prediciton. the food label

        Returns:
            str: llm's response
        """

        tokenizer = AutoTokenizer.from_pretrained("gpt3.5-turbo")
        llm = AutoModelForCausalLM.from_pretrained("gpt3.5-turbo")
        
        prompt = f"{prompt} {model_preds}"
        
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = llm.generate(**inputs, max_length=500)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        print(f"Detected Food Item: {detected_items}")
        print(f"Requested Data:\n{response}")
        return response

In [13]:
# Loading DETR Resnet-50 Model from HuggingFace:

# detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", num_labels=len(detr_processor.classes)+1, num_queries=25, ignore_mismatched_sizes=True)
detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
detr_model.config.no_object_weight = 0.5

# for name, param in detr_model.named_parameters():
#     print(name, param.requires_grad)
optimizer = optim.AdamW(detr_model.parameters(), lr=1e-4)

for param in detr_model.model.backbone.parameters():
    param.requires_grad = True


detr_wrapper = DETRModelManager(model=detr_model, optimizer=optimizer, device=device)

2024-12-08  04:39:18 INFO Loading pretrained weights from Hugging Face hub (timm/resnet50.a1_in1k)
2024-12-08  04:39:18 INFO [timm/resnet50.a1_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-12-08  04:39:18 INFO Missing keys (fc.weight, fc.bias) discovered while loading pretrained weights. This is expected if model is being adapted.
Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.

In [None]:
# Training the model:
detr_wrapper.train(training_data_loader=train_set, validation_data_loader=val_set, epochs=EPOCHS)

  0%|          | 0/50 [00:00<?, ?it/s]

Outputs From Last Batch: {'loss_ce': tensor(0.8371, device='cuda:0', grad_fn=<NllLoss2DBackward0>), 'loss_bbox': tensor(0.1158, device='cuda:0', grad_fn=<DivBackward0>), 'loss_giou': tensor(0.3438, device='cuda:0', grad_fn=<DivBackward0>), 'cardinality_error': tensor(1., device='cuda:0')}
Epoch 1 Average Loss: 3.446380138397217
Epoch 1 Mean Average Precision: 0.0, 0.0


  2%|▏         | 1/50 [01:13<59:50, 73.28s/it]

Validation Loss: 3.159837007522583
Validation Mean Average Precision: 0.0, 0.0


In [None]:
# Creating, saving, and displaying loss curve from training:
detr_wrapper.plot_loss_curve("DERT")

# Creating, saving, and displaying mAP curve from training:
detr_wrapper.plot_map_curve("DERT")

# Creating, saving, and displaying mAP50 curve from training:
detr_wrapper.plot_map50_curve("DERT")

In [None]:
# Testing the model:
detr_wrapper.test(test_set)