CS480 Kaggle Competition 
===============================

Name: Ricky Chu

SID: 20987513

Email: r2chu@uwaterloo.ca


(1) Load Data  
--------------------

In [None]:
import torch
from torchvision import datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torch.autograd import Variable
import numpy as np
import torch.nn as  nn
import torch.nn.functional as F
from torchvision.models import resnet50
import pandas as pd 
from google.colab import files
import csv 
import os 
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Parameters 
batch_size = 8
learning_rate = 2e-4
num_of_epochs = 10
ROOT_PATH = "drive/MyDrive/Kaggle/"

cnn = resnet50(pretrained=True).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate)

total_train_loss= []
total_train_acc=[]

# Custom Data Loader
class TestData(Dataset):
    def __init__(self, dir, transform):
        self.dir = dir
        self.transform = transform
        self.all_imgs = os.listdir(dir)

    def __len__(self):
        return len(self.all_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.dir, self.all_imgs[idx])
        id = os.path.basename(img_loc[:-4])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image,id

# Load Data
train_data = datasets.ImageFolder(
    root=ROOT_PATH+'5_shot/train',
    transform=transforms.Compose([transforms.ToTensor()],
    )
)

test_data = TestData(dir=ROOT_PATH+'5_shot/test', transform=transforms.ToTensor())

loaders = {
    'train': DataLoader(train_data,
                        batch_size=batch_size,
                        num_workers=0,
                        shuffle = True 
                        ),
           

    'test': DataLoader(test_data,
                       batch_size=1,
                       num_workers=0),
}

(2) Train 
--------------------

In [3]:
# Training function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)

    # Turn on training mode
    model.train()
    train_loss, correct = 0, 0
    for i, (images, labels) in enumerate(dataloader):
        # gives batch data, normalize x when iterate train_loader
        X = images.to(device)  # batch x
        y = labels.to(device)  # batch y

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # record loss
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    train_loss /= len(dataloader)
    total_train_loss.append(train_loss)
    correct /= size
    total_train_acc.append(correct)

    
print('epoch\tAccuracy\tLoss')
for epoch in range(num_of_epochs):
    train(loaders['train'], cnn, loss_func, optimizer)
    print(
    "{}\t{}\t{}".format(epoch + 1, round(total_train_acc[epoch], 5), round(total_train_loss[epoch], 5)))

# Save the model 
torch.save(cnn.state_dict(), "Resnet50model.pth")
files.download('Resnet50model.pth') 


epoch	Accuracy	Loss
1	0.28182	4.51436
2	0.86364	0.67096
3	0.95455	0.27634
4	0.96364	0.15865
5	0.95455	0.13994
6	1.0	0.05449
7	1.0	0.03966
8	1.0	0.02949
9	1.0	0.01989
10	1.0	0.01038


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

(3) Test
------------------------------------------

In [5]:
# Find the correct label 
def getkey(item):
    for key, value in train_data.class_to_idx.items():
        if value == item:
            return key

# Test function
def test(dataloader, model):
    
    model.eval()

    predictions = []

    with torch.no_grad():
        for i, (images, img_number) in enumerate(dataloader, 0):
            img_number = img_number[0]
            images = images.to(device)
            outputs = cnn(images)
            _ , predicted = torch.max(outputs, dim=1)
            predictions.append([img_number,predicted.cpu().item()])
        return predictions 


cnn.load_state_dict(torch.load('Resnet50model.pth', map_location=torch.device(device)))

# Testing
predictions = test(loaders['test'], cnn)

# Ouput the csv file for Kaggle submission
df = pd.DataFrame(predictions, columns=['id','category'])
df['category'] = df['category'].apply(getkey)
df.astype(int).sort_values(by='id').to_csv('kaggleComp.csv', index=False)
files.download('kaggleComp.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>