### 1) Loading dataset

* importing necessary libraries to read dataset

In [None]:
import os
import ast
import numpy as np
import pandas as pd
from PIL import Image

* setting paths for dataset files

In [None]:
# setting paths for train and test data
train_dir = "train"
train_annot_path = os.path.join(train_dir, "annotation.txt")

test_dir = "test"
test_annot_path = os.path.join(test_dir, "annotation.txt")

# setting paths for classes.txt
classes_txt_path = "classes.txt"

* getting class labels from annotation and class files

In [None]:
# getting class labels from "classes.txt"
with open(classes_txt_path) as file:
    class_map_s2i = {line.rstrip(): i for i, line in enumerate(file.readlines()) if line.rstrip()}

# Add "clutter" to the class_map_s2i
class_map_s2i["clutter"] = len(class_map_s2i)

class_map_i2s = {v: k for k, v in class_map_s2i.items()}

# getting labels for train data from "train/annotation.txt"
with open(train_annot_path) as file:
    train_annots = [line.rstrip().split() for line in file.readlines()]
train_img_path = [os.path.join(train_dir, "0" + train_annot[0][:3], train_annot[0] + ".jpg") for train_annot in train_annots]

# Modify this part to classify as "clutter" if all values are -1.00
train_int_lbl = []
for train_annot in train_annots:
    values = np.array([float(x) for x in train_annot[1:]])
    if np.all(values == -1.00):
        train_int_lbl.append(class_map_s2i["clutter"])  # Use the integer label for "clutter"
    else:
        train_int_lbl.append(np.argmax(values))

# Create train_str_lbl based on train_int_lbl
train_str_lbl = [class_map_i2s[label] for label in train_int_lbl]

# getting labels for test data from "test/annotation.txt"
with open(test_annot_path) as file:
    test_annots = [line.rstrip().split() for line in file.readlines()]
test_img_path = [os.path.join(test_dir, "0" + test_annot[0][:3], test_annot[0] + ".jpg") for test_annot in test_annots]

# Modify this part to classify as "clutter" if all values are -1.00
test_int_lbl = []
for test_annot in test_annots:
    values = np.array([float(x) for x in test_annot[1:]])
    if np.all(values == -1.00):
        test_int_lbl.append(class_map_s2i["clutter"])  # Use the integer label for "clutter"
    else:
        test_int_lbl.append(np.argmax(values))

# Create test_str_lbl based on test_int_lbl
test_str_lbl = [class_map_i2s[label] for label in test_int_lbl]

* creating dataframes for train and test set

In [None]:
# creating dataframes for train and test data
train_df = pd.DataFrame({"img_path":train_img_path, "int_label":train_int_lbl, "str_label":train_str_lbl})
test_df = pd.DataFrame({"img_path":test_img_path, "int_label":test_int_lbl, "str_label":test_str_lbl})

# displaying dataframes
print("\n\nTRAIN DF")
display(train_df)
print(train_df.str_label.value_counts(dropna=False))

print("\n\n\nTEST DF")
display(test_df)
print(test_df.str_label.value_counts())

* visualizing the images in the dataset

In [None]:
# displaying first image from train data
print(f"Image Class: {train_df.str_label[0]}")
print(f"Image Height: {Image.open(train_df.img_path[0]).height}")
print(f"Image Width: {Image.open(train_df.img_path[0]).width}")
Image.open(train_df.img_path[0])

In [None]:
# displaying last image from train data
print(f"Image Class: {train_df.str_label[len(train_df)-1]}")
print(f"Image Height: {Image.open(train_df.img_path[len(train_df)-1]).height}")
print(f"Image Width: {Image.open(train_df.img_path[len(train_df)-1]).width}")
Image.open(train_df.img_path[len(train_df)-1])

In [None]:
# displaying first image from test data
print(f"Image Class: {test_df.str_label[0]}")
print(f"Image Height: {Image.open(test_df.img_path[0]).height}")
print(f"Image Width: {Image.open(test_df.img_path[0]).width}")
Image.open(test_df.img_path[0])

In [None]:
# displaying 10 random images from train and test data each with their class labels
import random
import matplotlib.pyplot as plt

fig, ax = plt.subplots(2, 10, figsize=(20, 4))
for i in range(10):
    rand_int = random.randint(0, len(train_df)-1)
    ax[0, i].imshow(Image.open(train_df.img_path[rand_int]))
    ax[0, i].set_title(train_df.str_label[rand_int])
    ax[0, i].axis("off")

    rand_int = random.randint(0, len(test_df)-1)
    ax[1, i].imshow(Image.open(test_df.img_path[rand_int]))
    ax[1, i].set_title(test_df.str_label[rand_int])
    ax[1, i].axis("off")

In [None]:
# plotting class distribution for train data
train_df['str_label'].value_counts().plot(kind='bar')

### 2) Creating Dataloaders

* importing necessary libraries for dataloader

In [None]:
import torch
import torchvision
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchinfo import summary

In [None]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

* defining class to load custom dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        img = Image.open(img_path)
        label = int(self.dataframe.iloc[idx, 1])

        if self.transform:
            img = self.transform(img)

        return img, label


* defining transforms

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to the desired size
    transforms.ToTensor(),           # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                        std=[0.229, 0.224, 0.225])  # Normalized using ImageNet stats
])


* instantiating custom dataset for train and test data

In [None]:
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

* creating dataloaders for train and test data

In [None]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# Find the number of samples/batches per dataloader
print(f"Number of batches of size {batch_size} in training data: {len(train_loader)}")
print(f"Number of batches of size {batch_size} in testing data: {len(test_loader)}")
print(f"Number of classes: {len(class_map_s2i)}, class names: {class_map_s2i}")

### 3) Setting up device agnostic code

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

### 4) Defining Models that we are going to test

In [None]:
OUT_FEATURES = len(class_map_s2i)

In [None]:
# create an instance of EfficientNet-B0
def create_effnet_b0():
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    # set the seeds for reproducibility
    set_seeds()

    # change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES, bias=True)
    ).to(device)

    # Give the model a name
    model.name = "EfficientNet-B0"
    print(f"[INFO] Created new {model.name} model.")
    return model

# create an instance of EfficientNet-B1
def create_effnet_b1():
    weights = torchvision.models.EfficientNet_B1_Weights.DEFAULT
    model = torchvision.models.efficientnet_b1(weights=weights).to(device)

    # set the seeds for reproducibility
    set_seeds()

    # change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES, bias=True)
    ).to(device)

    # Give the model a name
    model.name = "EfficientNet-B1"
    print(f"[INFO] Created new {model.name} model.")
    return model

# create an instance of MobileNet-V2
def create_mobilenet_v2():
    weights = torchvision.models.MobileNetV2_Weights.DEFAULT
    model = torchvision.models.mobilenet_v2(weights=weights).to(device)

    # set the seeds for reproducibility
    set_seeds()

    # change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES, bias=True)
    ).to(device)

    # Give the model a name
    model.name = "MobileNet-V2"
    print(f"[INFO] Created new {model.name} model.")
    return model

# create an instance of MobileNet-V3 small
def create_mobilenet_v3_small():
    weights = torchvision.models.MobileNetV3_SMALL_Weights.DEFAULT
    model = torchvision.models.mobilenet_v3_small(weights=weights).to(device)

    # set the seeds for reproducibility
    set_seeds()

    # change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=576, out_features=OUT_FEATURES, bias=True)
    ).to(device)

    # Give the model a name
    model.name = "MobileNet-V3-small"
    print(f"[INFO] Created new {model.name} model.")
    return model

# create an instance of MobileNet-V3 LARGE
def create_mobilenet_v3_large():
    weights = torchvision.models.MobileNetV3_LARGE_Weights.DEFAULT
    model = torchvision.models.mobilenet_v3_large(weights=weights).to(device)

    # set the seeds for reproducibility
    set_seeds()

    # change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=960, out_features=OUT_FEATURES, bias=True)
    ).to(device)

    # Give the model a name
    model.name = "MobileNet-V3-large"
    print(f"[INFO] Created new {model.name} model.")
    return model

In [None]:
# 1. Create epochs list
num_epochs = [5, 10]

# 2. Create models list (need to create a new model for each experiment)
models = ["EfficientNet-B0", "EfficientNet-B1", "MobileNet-V2", "MobileNet-V3-small", "MobileNet-V3-large"]

In [None]:
%%time
import engine
from pathlib import Path
from utils import save_model

# 1. Set the random seeds
set_seeds(seed=42)

# 2. Keep track of experiment numbers
experiment_number = 0

# 3. Loop through each number of epochs
for epochs in num_epochs:

    # 5. Loop through each model name and create a new model based on the name
    for model_name in models:

        # 6. Create information print outs
        experiment_number += 1
        print(f"[INFO] Experiment number: {experiment_number}")
        print(f"[INFO] Model: {model_name}")
        print(f"[INFO] Number of epochs: {epochs}")  

        # 7. Select the model
        if model_name == "EfficientNet-B0":
            model = create_effnet_b0() # creates a new model each time (important because we want each experiment to start from scratch)

        elif model_name == "EfficientNet-B1":
            model = create_effnet_b1() 

        elif model_name == "MobileNet-V2":
            model = create_mobilenet_v2()

        elif model_name == "MobileNet-V3-small":
            model = create_mobilenet_v3_small()
        else:
            model = create_mobilenet_v3_large()
            
        # 8. Create a new loss and optimizer for every model
        loss_fn = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

        # 9. Train target model with target dataloaders and track experiments
        results = engine.train(model=model,
                               train_dataloader=train_loader,
                               test_dataloader=test_loader, 
                               optimizer=optimizer,
                               loss_fn=loss_fn,
                               epochs=epochs,
                               device=device)

        # 10. Save the model
        save_filepath = f"{model_name}_{epochs}_epochs.pth"
        save_model(model=model,
                   target_dir="models",
                   model_name=save_filepath)


        # save model accuracy and size
        model_accuracy = results["test_acc"][-1]
        model_size = Path(f"models/{save_filepath}").stat().st_size // (1024 * 1024)

        # Create a dictionary to store the values
        results_dict = {
            f"{model_name}_{epochs}_accuracy": model_accuracy,
            f"{model_name}_{epochs}_size": model_size
        }

        # Print the results
        print(f"Trained Model Size: {results_dict[f'{model_name}_{epochs}_size']} MB | Trained Model Accuracy: {results_dict[f'{model_name}_{epochs}_accuracy'] * 100:.2f}%")
        print("-" * 50 + "\n")

# Freeze all base layers in the "features" section of the model (the feature extractor) by setting requires_grad=False
for param in model.features.parameters():
    param.requires_grad = False

# # Do a summary *after* freezing the features and changing the output classifier layer (uncomment for actual output)
summary(model, 
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

# getting class labels from "classes.txt"
with open(classes_txt_path) as file:
    class_map_s2i = {line.rstrip():i for i, line in enumerate(file.readlines()) if line.rstrip()}
class_map_i2s = {v:k for k,v in class_map_s2i.items()}

# getting labels for train data from "train/annotation.txt"
with open(train_annot_path) as file:
    train_annots = [line.rstrip().split() for line in file.readlines()]
train_img_path = [os.path.join(train_dir, "0"+train_annot[0][:3], train_annot[0]+".jpg") for train_annot in train_annots]
train_int_lbl = [np.argmax(np.array([int(ast.literal_eval(x)) for x in train_annot[1:]])) for train_annot in train_annots]
train_str_lbl = [class_map_i2s[x] for x in train_int_lbl]

# getting labels for test data from "test/annotation.txt"
with open(test_annot_path) as file:
    test_annots = [line.rstrip().split() for line in file.readlines()]
test_img_path = [os.path.join(test_dir, "0"+test_annot[0][:3], test_annot[0]+".jpg") for test_annot in test_annots]
test_int_lbl = [np.argmax(np.array([int(ast.literal_eval(x)) for x in test_annot[1:]])) for test_annot in test_annots]
test_str_lbl = [class_map_i2s[x] for x in test_int_lbl]