# Assignment 3 Part 1: Developing Your Own Classifier

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision

from torchvision import transforms
from sklearn.metrics import average_precision_score
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from kaggle_submission import output_submission_csv
from classifier import  Classifier#, AlexNet
from voc_dataloader import VocDataset, VOC_CLASSES

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Part 1B: Design your own network

In this notebook, your task is to create and train your own model for multi-label classification on VOC Pascal.

## What to do
1. You will make change on network architecture in ```classifier.py```.
2. You may also want to change other hyperparameters to assist your training to get a better performances. Hints will be given in the below instructions.

## What to submit
Check the submission template for details what to submit. 

In [2]:
def train_classifier(train_loader, classifier, criterion, optimizer):
    classifier.train()
    loss_ = 0.0
    losses = []
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = classifier(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss)
    return torch.stack(losses).mean().item()

In [3]:
def test_classifier(test_loader, classifier, criterion, print_ind_classes=True, print_total=True):
    classifier.eval()
    losses = []
    with torch.no_grad():
        y_true = np.zeros((0,21))
        y_score = np.zeros((0,21))
        for i, (images, labels) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            logits = classifier(images)
            y_true = np.concatenate((y_true, labels.cpu().numpy()), axis=0)
            y_score = np.concatenate((y_score, logits.cpu().numpy()), axis=0)
            loss = criterion(logits, labels)
            losses.append(loss.item())
        aps = []
        # ignore first class which is background
        for i in range(1, y_true.shape[1]):
            ap = average_precision_score(y_true[:, i], y_score[:, i])
            if print_ind_classes:
                print('-------  Class: {:<12}     AP: {:>8.4f}  -------'.format(VOC_CLASSES[i], ap))
            aps.append(ap)
        
        mAP = np.mean(aps)
        test_loss = np.mean(losses)
        if print_total:
            print('mAP: {0:.4f}'.format(mAP))
            print('Avg loss: {}'.format(test_loss))
        
    return mAP, test_loss, aps

In [4]:
def plot_losses(train, val, test_frequency, num_epochs):
    plt.plot(train, label="train")
    indices = [i for i in range(num_epochs) if ((i+1)%test_frequency == 0 or i ==0)]
    plt.plot(indices, val, label="val")
    plt.title("Loss Plot")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.show()
    
def plot_mAP(train, val, test_frequency, num_epochs):
    indices = [i for i in range(num_epochs) if ((i+1)%test_frequency == 0 or i ==0)]
    plt.plot(indices, train, label="train")
    plt.plot(indices, val, label="val")
    plt.title("mAP Plot")
    plt.ylabel("mAP")
    plt.xlabel("Epoch")
    plt.legend()
    plt.show()
    

In [5]:

def train(classifier, num_epochs, train_loader, val_loader, criterion, optimizer, test_frequency=5):
    train_losses = []
    train_mAPs = []
    val_losses = []
    val_mAPs = []

    for epoch in range(1,num_epochs+1):
        print("Starting epoch number " + str(epoch))
        train_loss = train_classifier(train_loader, classifier, criterion, optimizer)
        train_losses.append(train_loss)
        print("Loss for Training on Epoch " +str(epoch) + " is "+ str(train_loss))
        if(epoch%test_frequency==0 or epoch==1):
            mAP_train, _, _ = test_classifier(train_loader, classifier, criterion, False, False)
            train_mAPs.append(mAP_train)
            mAP_val, val_loss, _ = test_classifier(val_loader, classifier, criterion)
            print('Evaluating classifier')
            print("Mean Precision Score for Testing on Epoch " +str(epoch) + " is "+ str(mAP_val))
            val_losses.append(val_loss)
            val_mAPs.append(mAP_val)
    
    return classifier, train_losses, val_losses, train_mAPs, val_mAPs

# Developing Your Own Model

### Goal
To meet the benchmark for this assignment you will need to improve the network. Note you should have noticed pretrained Alenxt performs really well, but training Alexnet from scratch performs much worse. We hope you can design a better architecture over both the simple classifier and AlexNet to train from scratch.

### How to start
You may take inspiration from other published architectures and architectures discussed in lecture. However, you are NOT allowed to use predefined models (e.g. models from torchvision) or use pretrained weights. Training must be done from scratch with your own custom model.

#### Some hints
There are a variety of different approaches you should try to improve performance from the simple classifier:

* Network architecture changes
    * Number of layers: try adding layers to make your network deeper
    * Batch normalization: adding batch norm between layers will likely give you a significant performance increase
    * Residual connections: as you increase the depth of your network, you will find that having residual connections like those in ResNet architectures will be helpful
* Optimizer: Instead of plain SGD, you may want to add a learning rate schedule, add momentum, or use one of the other optimizers you have learned about like Adam. Check the `torch.optim` package for other optimizers
* Data augmentation: You should use the `torchvision.transforms` module to try adding random resized crops and horizontal flips of the input data. Check `transforms.RandomResizedCrop` and `transforms.RandomHorizontalFlip` for this. Feel free to apply more [transforms](https://pytorch.org/docs/stable/torchvision/transforms.html) for data augmentation which can lead to better performance. 
* Epochs: Once you have found a generally good hyperparameter setting try training for more epochs
* Loss function: You might want to add weighting to the `MultiLabelSoftMarginLoss` for classes that are less well represented or experiment with a different loss function



#### Note
We will soon be providing some initial expectations of mAP values as a function of epoch so you can get an early idea whether your implementation works without waiting a long time for training to converge.

### What to submit 
Submit your best model to Kaggle and save all plots for the writeup.


In [6]:


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std= [0.229, 0.224, 0.225])

train_original = transforms.Compose([
            transforms.Resize(227),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            normalize
        ])

train_transform = transforms.Compose([
            transforms.RandomResizedCrop(227),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ColorJitter(brightness=0.5, contrast=0.5),
            transforms.ToTensor(),
            normalize
        ])

test_transform = transforms.Compose([
            transforms.Resize(227),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            normalize,
        ])

# train_transform = torch.utils.data.ConcatDataset([train_transform,train_transform1,train_transform2])

ds_train_original = VocDataset('VOCdevkit_2007/VOC2007/','train',train_original)
ds_train_transform = VocDataset('VOCdevkit_2007/VOC2007/','train',train_transform)
ds_train = torch.utils.data.ConcatDataset([ds_train_original,ds_train_transform])

ds_val = VocDataset('VOCdevkit_2007/VOC2007/','val',test_transform)
ds_test = VocDataset('VOCdevkit_2007/VOC2007test/','test', test_transform)


  np.array(box_indices),


In [7]:
num_epochs = 100
test_frequency = 5
batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset=ds_train,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                               num_workers=1)

val_loader = torch.utils.data.DataLoader(dataset=ds_val,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                               num_workers=1)

test_loader = torch.utils.data.DataLoader(dataset=ds_test,
                                               batch_size=batch_size, 
                                               shuffle=False,
                                               num_workers=1)


import gc

gc.collect()

torch.cuda.empty_cache()
# size check

labels = "n"

In [8]:
# for i,j in enumerate(train_loader):
#     #     print(i,j[0].shape,j[1])
#         tmp = j[1].detach().numpy()
#         if isinstance(labels,str):
#             labels = tmp
#             print("inti")
#         else:
#             print(labels.shape)
#             labels = np.concatenate([labels, tmp], axis=0)
# # batch size, channels, size 
# labels.shape

# weight = np.sum(labels,axis=0)
# print(weight)
# weight = (2501 - weight)/2501

In [9]:
# labels.sum(axis=0)

In [10]:
# my random start
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
import numpy as np

NUM_CLASSES = 21

class Classifier(nn.Module):
    # TODO: implement me
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 64, 7, stride=1,
                     padding=2, bias=False)
        self.conv2 = nn.Conv2d(64, 256, 3, stride=1,
                     padding=1, bias=False)
        self.conv3 = nn.Conv2d(256, 64, 3, stride=1,
                     padding=1, bias=False)

        self.pool = nn.AvgPool2d(3, 3)
#         self.pool2 = nn.MaxPool2d(3, 3,stride=2)
        
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(256)
        self.bn3 = nn.BatchNorm2d(64)
#         self.bn4 = nn.BatchNorm2d(16)
        
        self.dropout = nn.Dropout(p=0.3)
        
        self.fc1 = nn.Linear(4096, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, NUM_CLASSES)

    def forward(self, x):
        x = self.pool(self.bn1(F.relu(self.conv1(x))))
        x = self.pool(self.bn2(F.relu(self.conv2(x)))) 
        x = self.pool(self.bn3(F.relu(self.conv3(x)))) 

        x = x.view(-1,64*x.size()[2]**2)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [11]:
# VGG-16 Inspired 
# my random start
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
import numpy as np

NUM_CLASSES = 21

class ClassifierVGG(nn.Module):
    # TODO: implement me
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1,
                     padding=1, bias=False)
        self.conv2 = nn.Conv2d(32, 32, 3, stride=1,
                     padding=1, bias=False)
        
        self.conv3 = nn.Conv2d(32, 64, 3, stride=1,
                     padding=1, bias=False)
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1,
                     padding=1, bias=False)
        
        self.conv5 = nn.Conv2d(64, 128, 3, stride=1,
                     padding=1, bias=False)
        self.conv6 = nn.Conv2d(128, 128, 3, stride=1,
                     padding=1, bias=False)
        
        self.conv7 = nn.Conv2d(128, 256, 3, stride=1,
                     padding=1, bias=False)
        self.conv8 = nn.Conv2d(256, 256, 3, stride=1,
                     padding=1, bias=False)
        
        self.pool = nn.MaxPool2d(3, 3)
#         self.pool2 = nn.MaxPool2d(3, 3,stride=2)
        
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
#         self.bn4 = nn.BatchNorm2d(16)
        
        self.dropout = nn.Dropout(p=0.5)
        
        self.fc1 = nn.Linear(1024, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, NUM_CLASSES)
        
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))# 16 filters out
        x = self.pool(x)
        
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))# 32 filters out
        x = self.pool(x)
        
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv6(x)) # 64 filters out
        x = self.pool(x)
        
        x = F.relu(self.conv7(x))
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv8(x))# 128 filters out
        x = self.pool(x)
        
#         print(x.size())

        x = x.view(-1,256*x.size()[2]**2)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
#         x = self.softmax(x)
        return x


In [12]:
# Inception Inspired 
# my random start
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
import numpy as np

NUM_CLASSES = 21

class ClassifierInception(nn.Module):
    # TODO: implement me
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 64, 3, stride=1,
                     padding=1, bias=False)
        self.conv2 = nn.Conv2d(64, 128, 3, stride=1,
                     padding=1, bias=False)
        
        self.conv3 = nn.Conv2d(128, 256, 3, stride=1,
                     padding=1, bias=False)
        self.conv4 = nn.Conv2d(256, 128, 1, stride=1,
                     padding=1, bias=False)
        
        self.pool = nn.MaxPool2d(3, 3)
        self.AvgPool = nn.AvgPool2d(5, 5)
        
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(128)
        self.bn3 = nn.BatchNorm2d(256)
        self.bn4 = nn.BatchNorm2d(128)
#         self.bn4 = nn.BatchNorm2d(16)
        
        self.dropout = nn.Dropout(p=0.5)
        
        self.fc1 = nn.Linear(512, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, NUM_CLASSES)
        
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        print("start",x.size())
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        print("Conv1",x.size())
        x = self.pool(x)
        print("pool1",x.size())
        
        x = F.relu(self.conv2(x))
        x = self.bn2(x)
        print("Conv2",x.size())
        x = self.pool(x)
        print("pool2",x.size())
        
        x = F.relu(self.conv3(x))
        x = self.bn3(x)
        print("Conv3",x.size())
        x = self.pool(x)   
        print("pool3",x.size())
        
        x = F.relu(self.conv4(x))
        x = self.bn4(x)
        print("Conv4",x.size())
        x = self.AvgPool(x)
        print("pool4",x.size())
        
        x = x.view(-1,128*x.size()[2]**2)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
#         x = self.softmax(x)
        return x


In [13]:
# TODO: Run your own classifier here
# export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
    
classifier = ClassifierInception().to(device)

# criterion = nn.MultiLabelSoftMarginLoss(weight=torch.tensor(weight).to(device))
criterion = nn.MultiLabelSoftMarginLoss()
# optimizer = torch.optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.Adam(classifier.parameters(), lr=1e-4)

classifier, train_losses, val_losses, train_mAPs, val_mAPs = train(classifier, num_epochs, train_loader, val_loader, criterion, optimizer, test_frequency)


Starting epoch number 1
start torch.Size([64, 3, 227, 227])
Conv1 torch.Size([64, 64, 227, 227])
pool1 torch.Size([64, 64, 75, 75])
Conv2 torch.Size([64, 128, 75, 75])
pool2 torch.Size([64, 128, 25, 25])
Conv3 torch.Size([64, 256, 25, 25])
pool3 torch.Size([64, 256, 8, 8])
Conv4 torch.Size([64, 128, 10, 10])
pool4 torch.Size([64, 128, 2, 2])


KeyboardInterrupt: 

In [None]:
plot_losses(train_losses, val_losses, test_frequency, num_epochs)
plot_mAP(train_mAPs, val_mAPs, test_frequency, num_epochs)

In [None]:
mAP_test, test_loss, test_aps = test_classifier(test_loader, classifier, criterion)
print(mAP_test)

In [None]:
torch.save(classifier.state_dict(), './voc_my_best_classifier.pth')
output_submission_csv('my_solution.csv', test_aps)

In [None]:
def test_classifier(test_loader, classifier, criterion, print_ind_classes=True, print_total=True):
    classifier.eval()
    losses = []
    with torch.no_grad():
        y_true = np.zeros((0,21))
        y_score = np.zeros((0,21))
        for i, (images, labels) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            logits = classifier(images)
            y_true = np.concatenate((y_true, labels.cpu().numpy()), axis=0)
            y_score = np.concatenate((y_score, logits.cpu().numpy()), axis=0)
            loss = criterion(logits, labels)
            losses.append(loss.item())
        aps = []
        # ignore first class which is background
        for i in range(1, y_true.shape[1]):
            ap = average_precision_score(y_true[:, i], y_score[:, i])
            if print_ind_classes:
                print('-------  Class: {:<12}     AP: {:>8.4f}  -------'.format(VOC_CLASSES[i], ap))
            aps.append(ap)
        
        mAP = np.mean(aps)
        test_loss = np.mean(losses)
        if print_total:
            print('mAP: {0:.4f}'.format(mAP))
            print('Avg loss: {}'.format(test_loss))
        
    return mAP, test_loss, aps

In [None]:

net = ClassifierInception()
load_network_path = './voc_my_best_classifier_sub.pth'
net.load_state_dict(torch.load(load_network_path))
criterion = nn.MultiLabelSoftMarginLoss()


mAP_test, test_loss, test_aps = test_classifier(test_loader, net, criterion)
print(mAP_test)