# COMP7250 (Project)

This project builds binary classification model to distinguish cars and trucks from CIFAR-10 dataset

# Import libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Define data transformations

In [None]:
## Specify transforms
## Use imagenet stats for normalizing
transform_base = [
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
    ]


def transform_train(aug_type = None, alpha = 0):
    ## Transformation without augmentation
    aug = transforms.RandomHorizontalFlip(p=0.0)
    if aug_type == None:
        return transforms.Compose(transform_base)
    elif aug_type == 'h_flip':
        aug = transforms.RandomHorizontalFlip(p=alpha)
    elif aug_type == 'v_flip':
        aug = transforms.RandomVerticalFlip(p=alpha)
    elif aug_type == 'rotate':
        aug = transforms.RandomRotation(degrees=360*alpha)
    elif aug_type == 'translate':
        aug = transforms.RandomAffine(degrees=0, translate=(0.2, 0.2))
    elif aug_type == 'contrast':
        aug = transforms.ColorJitter(contrast=alpha)
    elif aug_type == 'saturation':
        aug = transforms.ColorJitter(saturation=alpha)
    elif aug_type == 'hue':
        aug = transforms.ColorJitter(hue=alpha / 2)
    elif aug_type == 'brightness':
        aug = transforms.ColorJitter(brightness=alpha)
    return transforms.Compose([aug]+transform_base)

def transform_val():
    return transforms.Compose(transform_base)

# Load the dataset

In [None]:
BATCH_SIZE = 8

target_classes = [1,9]

## Extract automobile and truck classes. Equal # of images in each class. 
def get_binary_subset(dataset, subset_size):
    indices_1 = [i for i, (_, label) in enumerate(dataset) if label  == 1]
    indices_2 = [i for i, (_, label) in enumerate(dataset) if label  == 9]
    final_indices = indices_1[:(subset_size // 2)] + indices_2[:(subset_size// 2)]
    ### Modify the class 9 into 0 (1 is already 1)
    for i in final_indices:
        if dataset.targets[i] == target_classes[1]:
            dataset.targets[i] = 0
    ### Divide the data into train/val preserving class balance
    data_subset = Subset(dataset, final_indices)  
    return data_subset



cifar10_train = torchvision.datasets.CIFAR10(root='./data', train=True, download=True) 
cifar10_val = torchvision.datasets.CIFAR10(root='./data', train=False, transform = transform_val(), download=True) 
val_set = get_binary_subset(cifar10_val, 200)

# Training

In [None]:
## Specify training details
batch_size = 8
lr_rate = 0.0001
num_epochs = 10


def train_model(aug_type, alpha = 0):
    ## Load the pre-trained model
    model = torchvision.models.resnet18(pretrained=True)
    
    ## Replace the classification layer and freeze all layers except the final
    for param in model.parameters():
        param.requires_grad = False
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2) 
    model.fc.requires_grad = True  

    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.fc.parameters(), lr=lr_rate)
    
    train_loss_history = []
    train_acc_history = []
    val_loss_history = []
    val_acc_history = []  
    for epoch in range(num_epochs):
        ## Change to train mode
        model.train()
        ## Go through the train set
        train_loss = 0
        train_acc = 0
        correct_preds_train = 0
        eval_loss = 0
        eval_acc = 0
        correct_preds_eval = 0
        for images, labels in train_loader:
            ## Clear the gradients
            optimizer.zero_grad()
            
            ## forward prop
            output = model(images)
            loss = loss_function(output, labels)
            ## Convert the loss from tensor into python number and add to train_loss
            train_loss += loss.item()
            
            ## Backward prop, calculate gradients and update the parameters
            loss.backward()
            optimizer.step()
    
            ## Get the predicted classes
            ## - refers to max values and skipped. 1 means finding max along class dimension (0 is batch dimension)
            _, preds = torch.max(output, 1)
            correct_preds_train += torch.sum(preds == labels)
        ## Divide train_loss by batch size
        train_loss = train_loss / len(train_loader)
        train_acc = correct_preds_train / len(train_loader.dataset)
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)
        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                ## forward prop
                output = model(images)
                loss = loss_function(output, labels)
                eval_loss += loss.item()
                ## Get the predicted classes
                ## - refers to max values and skipped. 1 means finding max along class dimension (0 is batch dimension)
                _, preds = torch.max(output, 1)
                correct_preds_eval += torch.sum(preds == labels)
        eval_loss = eval_loss / len(val_loader)
        eval_acc = correct_preds_eval / len(val_loader.dataset)
        val_loss_history.append(eval_loss)
        val_acc_history.append(eval_acc)
        print("Epoch number {} of {}".format(epoch+1, num_epochs))
        print("Train loss: {}, Train accuracy: {} ".format(train_loss, train_acc))
        print("Evaluation loss: {}, Evaluation accuracy: {} ".format(eval_loss, eval_acc))
    histories = {}
    histories['train_loss'] = train_loss_history
    histories['val_loss'] = val_loss_history
    histories['train_acc'] = train_acc_history
    histories['val_acc'] = val_acc_history
    histories['aug_type'] = aug_type
    histories['alpha'] = alpha
    result = {}
    result['train_loss'] = round(train_loss_history[-1],4)
    result['val_loss'] = round(val_loss_history[-1],4)
    result['train_acc'] = round(float(train_acc_history[-1]),4) * 100
    result['val_acc'] = round(float(val_acc_history[-1]), 4) * 100
    result['alpha'] = alpha
    result['aug_type'] = aug_type
    return model, histories, result

# Baseline Model

In [None]:
data_augmentations = [ 'None']
histories = []
results = []


for aug_type in data_augmentations:
    print("Training a model with {}".format(aug_type))
    cifar10_train.transform = transform_train(aug_type)
    train_set = get_binary_subset(cifar10_train, 500)
    # train_set, val_set = torch.utils.data.random_split(cifar10_dataset_binary, [300,100])
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
    model, history, result = train_model(aug_type)
    print('-'*100)
    histories.append(history)
    results.append(result)

# Color Based Augmentation

In [None]:
data_augmentations = ['hue', 'contrast', 'saturation']
# histories = []
# results = []




for aug_type in data_augmentations:
    print("Training a model with {}".format(aug_type))
    ## Hyperparameter tuning by random search from uniform distribution
    for i in range(5):
        alpha = round(np.random.uniform(), 2)
        print("alpha: ", alpha)
        cifar10_train.transform = transform_train(aug_type, alpha)
        train_set = get_binary_subset(cifar10_train, 500)
        # train_set, val_set = torch.utils.data.random_split(cifar10_dataset_binary, [300,100])
        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
        model, history, result = train_model(aug_type, alpha)
        print('-'*100)
        histories.append(history)
        results.append(result)

# Geometric augmentations

In [None]:
data_augmentations = ['h_flip', 'v_flip', 'rotation']
# data_augmentations = ['None']
# histories = []
# results = []




for aug_type in data_augmentations:
    print("Training a model with {}".format(aug_type))
    for i in range(5):
        alpha = round(np.random.uniform(), 2)
        print("alpha: ", alpha)
        cifar10_train.transform = transform_train(aug_type, alpha)
        train_set = get_binary_subset(cifar10_train, 500)
        # train_set, val_set = torch.utils.data.random_split(cifar10_dataset_binary, [300,100])
        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
        model, history, result = train_model(aug_type, alpha)
        print('-'*100)
        histories.append(history)
        results.append(result)

# Effect of dataset size for data augmentation effectiveness

In [None]:
train_sizes = [300,400,600,700]
val_sizes = [round(i * 0.3 / 0.7) for i in train_sizes]


cifar10_train = torchvision.datasets.CIFAR10(root='./data', train=True, download=True) 
cifar10_val = torchvision.datasets.CIFAR10(root='./data', train=False, transform = transform_val(), download=True) 




histories = []
results = []

In [None]:
data_augmentations = ['None', 'contrast']


for aug_type in data_augmentations:
    print(aug_type)
    for i in range(4):
        print("Training a model with {}".format(aug_type))
        print("Training size: ", train_sizes[i])
        print("Val size: ", val_sizes[i])
        alpha = 0.58
        cifar10_train.transform = transform_train(aug_type, alpha)
        train_set = get_binary_subset(cifar10_train, train_sizes[i])
        val_set = get_binary_subset(cifar10_val, val_sizes[i])
        # train_set, val_set = torch.utils.data.random_split(cifar10_dataset_binary, [300,100])
        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
        model, history, result = train_model(aug_type, alpha)
        result['train_size'] = train_sizes[i]
        print('-'*100)
        histories.append(history)
        results.append(result)

In [None]:
## Plot trainign loss curve
epochs = [i for i in range(1,num_epochs+1)]

plt.plot(epochs, histories[1]['train_loss'], label = "Train - baseline" , color = 'orange')
plt.plot(epochs, histories[1]['val_loss'], label = "Val - baseline" , color = 'gold')
plt.plot(epochs, histories[5]['train_loss'], label = "Train  - best aug" , color = 'gray')
plt.plot(epochs, histories[5]['val_loss'], label = "Val - best aug",  color = 'lightblue')


plt.title('Train/Eval Curve Comparison')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('Baseline vs Contrast,  10 epochs, train_size=400.png')
plt.show()

In [None]:
## Plot accuracy curve
epochs = [i for i in range(1,num_epochs+1)]

plt.plot(epochs, histories[1]['train_acc'], label = "Train - baseline" , color = 'orange')
plt.plot(epochs, histories[1]['val_acc'], label = "Val - baseline" , color = 'gold')
plt.plot(epochs, histories[5]['train_acc'], label = "Train  - best aug" , color = 'gray')
plt.plot(epochs, histories[5]['val_acc'], label = "Val - best aug",  color = 'lightblue')


plt.title('Train/Eval Accuracy Comparison')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('Baseline vs Contrast, Accuracy,  10 epochs, train_size=400.png')
plt.show()

In [None]:
pd.DataFrame(results).to_excel("results_diff_dataset_size.xlsx")

## Effect of injecting data augmentation at different epochs

In [None]:
## Specify training details
batch_size = 8
lr_rate = 0.0001
num_epochs = 10


cifar10_train.transform = transform_train()
train_set = get_binary_subset(cifar10_train, 400)
train_loader_baseline = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)



def train_model_modified_epoch(aug_type, alpha = 0, epoch_inject = 1):
    ## Load the pre-trained model
    model = torchvision.models.resnet18(pretrained=True)
    
    ## Replace the classification layer and freeze all layers except the final
    for param in model.parameters():
        param.requires_grad = False
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2) 
    model.fc.requires_grad = True  

    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.fc.parameters(), lr=lr_rate)
    
    train_loss_history = []
    train_acc_history = []
    val_loss_history = []
    val_acc_history = []  
    for epoch in range(num_epochs):
        ## Change to train mode
        model.train()
        ## Go through the train set
        train_loss = 0
        train_acc = 0
        correct_preds_train = 0
        eval_loss = 0
        eval_acc = 0
        correct_preds_eval = 0
        if epoch < (epoch_inject - 1):
            for images, labels in train_loader_baseline:
                ## Clear the gradients
                optimizer.zero_grad()
                
                ## forward prop
                output = model(images)
                loss = loss_function(output, labels)
                ## Convert the loss from tensor into python number and add to train_loss
                train_loss += loss.item()
                
                ## Backward prop, calculate gradients and update the parameters
                loss.backward()
                optimizer.step()
        
                ## Get the predicted classes
                ## - refers to max values and skipped. 1 means finding max along class dimension (0 is batch dimension)
                _, preds = torch.max(output, 1)
                correct_preds_train += torch.sum(preds == labels)
        else:
            for images, labels in train_loader:
                ## Clear the gradients
                optimizer.zero_grad()
                
                ## forward prop
                output = model(images)
                loss = loss_function(output, labels)
                ## Convert the loss from tensor into python number and add to train_loss
                train_loss += loss.item()
                
                ## Backward prop, calculate gradients and update the parameters
                loss.backward()
                optimizer.step()
        
                ## Get the predicted classes
                ## - refers to max values and skipped. 1 means finding max along class dimension (0 is batch dimension)
                _, preds = torch.max(output, 1)
                correct_preds_train += torch.sum(preds == labels)
    
        ## Divide train_loss by batch size
        train_loss = train_loss / len(train_loader)
        train_acc = correct_preds_train / len(train_loader.dataset)
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)
        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                ## forward prop
                output = model(images)
                loss = loss_function(output, labels)
                eval_loss += loss.item()
                ## Get the predicted classes
                ## - refers to max values and skipped. 1 means finding max along class dimension (0 is batch dimension)
                _, preds = torch.max(output, 1)
                correct_preds_eval += torch.sum(preds == labels)
        eval_loss = eval_loss / len(val_loader)
        eval_acc = correct_preds_eval / len(val_loader.dataset)
        val_loss_history.append(eval_loss)
        val_acc_history.append(eval_acc)
        print("Epoch number {} of {}".format(epoch+1, num_epochs))
        print("Train loss: {}, Train accuracy: {} ".format(train_loss, train_acc))
        print("Evaluation loss: {}, Evaluation accuracy: {} ".format(eval_loss, eval_acc))
    histories = {}
    histories['train_loss'] = train_loss_history
    histories['val_loss'] = val_loss_history
    histories['train_acc'] = train_acc_history
    histories['val_acc'] = val_acc_history
    histories['aug_type'] = aug_type
    histories['alpha'] = alpha
    result = {}
    result['train_loss'] = round(train_loss_history[-1],4)
    result['val_loss'] = round(val_loss_history[-1],4)
    result['train_acc'] = round(float(train_acc_history[-1]),4) * 100
    result['val_acc'] = round(float(val_acc_history[-1]), 4) * 100
    result['alpha'] = alpha
    result['aug_type'] = aug_type
    return model, histories, result

In [None]:
data_augmentations = ['contrast']
histories = []
results = []




for aug_type in data_augmentations:
    print("Training a model with {}".format(aug_type))
    ## Hyperparameter tuning by random search from uniform distribution
    for i in range(0,5):
        alpha = 0.58
        print("alpha: ", alpha)
        cifar10_train.transform = transform_train(aug_type, alpha)
        train_set = get_binary_subset(cifar10_train, 400)
        # train_set, val_set = torch.utils.data.random_split(cifar10_dataset_binary, [300,100])
        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
        model, history, result = train_model_modified_epoch(aug_type, alpha, epoch_inject = i*2+1)
        result['epoch'] = 2*i+1
        print('-'*100)
        histories.append(history)
        results.append(result)

In [None]:
pd.DataFrame(results).to_excel("results_diff_diff_epoch.xlsx")