# Prac Deep Learning Sys (COMS 6998-015)
## Homework 3
### Nathan Coulibaly, nmc2169

## Problem 1

1. Evaluation of pretrained MobilenetV1 SSD (mobilenet-v1-ssd-mp-0_675.pth) with Pascal VOC 2007 Dataset:

    **Average Precision Per Class:**

    - **aeroplane**: 0.6843
    - **bicycle**: 0.7911
    - **bird**: 0.6172
    - **boat**: 0.5613
    - **bottle**: 0.3483
    - **bus**: 0.7684
    - **car**: 0.7281
    - **cat**: 0.8369
    - **chair**: 0.5169
    - **cow**: 0.6239
    - **diningtable**: 0.7063
    - **dog**: 0.7873
    - **horse**: 0.8195
    - **motorbike**: 0.7924
    - **person**: 0.7023
    - **pottedplant**: 0.3985
    - **sheep**: 0.6067
    - **sofa**: 0.7572
    - **train**: 0.8262
    - **tvmonitor**: 0.6465

    **Average Precision Across All Classes**: **0.6760**
    

Pretrained mobilenet-v1-ssd-mp-0_675 model with 3 classes (BACKGROUND, Airplane, Helicopter)

**Average Precision Per-class:**
- **Airplane**: 0.007641559442716817
- **Helicopter**: 0.005506622493994895

**Average Precision Across All Classes:** 0.006574090968355857

After fine-tuning for 10 epochs on open_images airplane and helicopter dataset, Validation Loss: 1.9261, Validation Regression Loss 0.6156, Validation Classification Loss: 1.3105

**Average Precision Per-class:**
 - **Airplane:** 0.7871694636324705
 - **Helicopter:** 0.8729167315760018

**Average Precision Across All Classes:** 0.8300430976042361

In [25]:
import torch
import torch.onnx
from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd  # Adjust the import if necessary

model_path = "models/mb1-ssd-Epoch-9-Loss-1.9260564812010141.pth"
model = create_mobilenetv1_ssd(num_classes=3) 
model.load_state_dict(torch.load(model_path))
model.eval()  # Set the model to evaluation mode

# Define a dummy input tensor
dummy_input = torch.randn(1, 3, 300, 300)

# Export the model to ONNX format
onnx_path = "models/finetuned-mb1-ssd.onnx" 
torch.onnx.export(
    model,                     # Model to be exported
    dummy_input,               # Dummy input tensor
    onnx_path,                 # Output file path
    export_params=True,        # Store the trained parameter weights inside the model file
    opset_version=11,          # ONNX opset version to export the model
    do_constant_folding=True,  # Whether to execute constant folding for optimization
    input_names=['input'],     # Input name (optional)
    output_names=['output']    # Output name (optional)
)

print(f"Model exported to {onnx_path}")

  model.load_state_dict(torch.load(model_path))


OnnxExporterError: Module onnx is not installed!

## Problem 2

1. Fine-tuning with Daimler Ped dataset from the Visual Domain Decathlon

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
data_dir = 'data/daimlerpedcls'
image_datasets = {
    'train': ImageFolder(data_dir + '/train', data_transforms['train']),
    'val': ImageFolder(data_dir + '/val', data_transforms['val']),
}
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=64, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=64, shuffle=False, num_workers=4),
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)  # 2 classes for the Daimler Ped dataset
model_ft = model_ft.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

scheduler = MultiStepLR(optimizer_ft, milestones=[40, 80, 120], gamma=0.1)

In [None]:
num_epochs = 150
for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            model_ft.train()
        else:
            model_ft.eval()

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer_ft.zero_grad()

            # Forward
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model_ft(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer_ft.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

print('Training complete')

In [None]:
import copy

def train_model(model, criterion, optimizer, scheduler, num_epochs=150):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    print('Training complete')
    print(f'Best val Acc: {best_acc:.4f}')
    model.load_state_dict(best_model_wts)
    return model, best_acc