# Google Colab setup with Google Drive folder

This notebook provides the code you need to set up Google Colab to run and import files from within a Google Drive folder.

This will allow you to upload assignment code to your Google Drive and then run the code on Google Colab machines (with free GPUs if needed). 

You will need to create a folder in your Google Drive to hold your assignments and you will need to open Colaboratory within this folder before running the set up code (check the link above to see how).

# Copy data to local dir

In [None]:
!tar -xf data/cifar100.tar.gz -C data/
!tar -xf data/test.tar.gz -C data
!tar -xf data/train.tar.gz -C data/

In [None]:
ls data

[1m[36mcifar100[m[m/        [1m[36mtest[m[m/            [1m[36mtrain[m[m/
cifar100.tar.gz  test.tar.gz      train.tar.gz


Make sure that pytorch is installed and works with GPU:

In [None]:
import torch
device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
a = torch.Tensor([1]).to(device)
print(a)


tensor([1.], device='mps:0')


# Part 1

In [1]:
"""Headers"""

from __future__ import print_function
from PIL import Image
import tqdm
import os
import os.path
import numpy as np
import sys
if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

import torch.utils.data as data
from torchvision.datasets.utils import download_url, check_integrity

import csv
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os.path
import sys
import torch
import torch.utils.data
import torchvision
import torchvision.transforms as transforms

from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

np.random.seed(111)
# torch.mps.manual_seed(111)
torch.manual_seed(111)

<torch._C.Generator at 0x1242cca10>



## **Just execute the cell below. This is the dataloader. DO NOT CHANGE ANYTHING IN HERE!**


In [2]:
""""""

class CIFAR10_SFU_CV(data.Dataset):
    """`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.

    Args:
        root (string): Root directory of dataset where directory
            ``cifar-10-batches-py`` exists or will be saved to if download is set to True.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.

    """
    base_folder = 'cifar100'
    url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    filename = "cifar100.tar.gz"
    tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
    train_list = [
        ['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
        ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
        ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
        ['data_batch_4', '634d18415352ddfa80567beed471001a'],
        ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
    ]

    test_list = [
        ['test_batch', '40351d587109b95175f43aff81a1287e'],
    ]

    def __init__(self, root, fold="train",
                 transform=None, target_transform=None,
                 download=False):
        
        fold = fold.lower()

        self.train = False
        self.test = False
        self.val = False

        if fold == "train":
            self.train = True
        elif fold == "test":
            self.test = True
        elif fold == "val":
            self.val = True
        else:
            raise RuntimeError("Not train-val-test")


        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform

        fpath = os.path.join(root, self.filename)
        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' Download it and extract the file again.')

        # now load the picked numpy arrays
        if self.train or self.val:
            self.train_data = []
            self.train_labels = []
            for fentry in self.train_list:
                f = fentry[0]
                file = os.path.join(self.root, self.base_folder, f)
                fo = open(file, 'rb')
                if sys.version_info[0] == 2:
                    entry = pickle.load(fo)
                else:
                    entry = pickle.load(fo, encoding='latin1')
                self.train_data.append(entry['data'])
                if 'labels' in entry:
                    self.train_labels += entry['labels']
                else:
                    self.train_labels += entry['fine_labels']
                fo.close()

            self.train_data = np.concatenate(self.train_data)
            self.train_data = self.train_data.reshape((50000, 3, 32, 32))
            self.train_data = self.train_data.transpose((0, 2, 3, 1))  # convert to HWC
            
            p = np.arange(0,50000,10)
            mask_train = np.ones((50000,), dtype=bool)
            mask_train[p] = False
            mask_val = np.zeros((50000,), dtype=bool)
            mask_val[p] = True

            copy_all_data = np.array(self.train_data)
            self.val_data = np.array(copy_all_data[mask_val])
            self.train_data = np.array(copy_all_data[mask_train])
            
            copy_all_labels = np.array(self.train_labels)
            self.val_labels = np.array(copy_all_labels[mask_val])
            self.train_labels = np.array(copy_all_labels[mask_train])

        elif self.test:
            f = self.test_list[0][0]
            file = os.path.join(self.root, self.base_folder, f)
            fo = open(file, 'rb')
            if sys.version_info[0] == 2:
                entry = pickle.load(fo)
            else:
                entry = pickle.load(fo, encoding='latin1')
            self.test_data = entry['data']

            if 'labels' in entry:
                self.test_labels = entry['labels']
            else:
                self.test_labels = entry['fine_labels']
            fo.close()
            self.test_data = self.test_data.reshape((10000, 3, 32, 32))
            self.test_data = self.test_data.transpose((0, 2, 3, 1))  # convert to HWC

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        if self.train:
            img, target = self.train_data[index], self.train_labels[index]
        elif self.test:
            img, target = self.test_data[index], self.test_labels[index]
        elif self.val:
            img, target = self.val_data[index], self.val_labels[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def __len__(self):
        if self.train:
            return len(self.train_data)
        elif self.test:
            return len(self.test_data)
        elif self.val:
            return len(self.val_data)

    def _check_integrity(self):
        root = self.root
        for fentry in (self.train_list + self.test_list):
            filename, md5 = fentry[0], fentry[1]
            fpath = os.path.join(root, self.base_folder, filename)
            if not check_integrity(fpath, md5):
                return False
        return True

    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        tmp = 'train' if self.train is True else 'test'
        fmt_str += '    Split: {}\n'.format(tmp)
        fmt_str += '    Root Location: {}\n'.format(self.root)
        tmp = '    Transforms (if any): '
        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        tmp = '    Target Transforms (if any): '
        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        return fmt_str


class CIFAR100_SFU_CV(CIFAR10_SFU_CV):
    """`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.

    This is a subclass of the `CIFAR10` Dataset.
    """
    base_folder = 'cifar100'
    filename = "cifar100.tar.gz"
    tgz_md5 = 'e68a4c763591787a0b39fe2209371f32'
    train_list = [
        ['train_cs543', '49eee854445c1e2ebe796cd93c20bb0f'],
    ]

    test_list = [
        ['test_cs543', 'd3fe9f6a9251bd443f428f896d27384f'],
    ]

This file has been adapted from the easy-to-use tutorial released by PyTorch:
http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

Training an image classifier
----------------------------

We will do the following steps in order:

1. Load the CIFAR100_SFU_CV training, validation and test datasets using
   torchvision. Use torchvision.transforms to apply transforms on the
   dataset.
2. Define a Convolution Neural Network - BaseNet
3. Define a loss function and optimizer
4. Train the network on training data and check performance on val set.
   Plot train loss and validation accuracies.
5. Try the network on test data and create .csv file for submission to kaggle

In [3]:
# <<TODO#5>> Based on the val set performance, decide how many
# epochs are apt for your model.
# ---------
EPOCHS = 20
# ---------

IS_GPU = True
TEST_BS = 256
TOTAL_CLASSES = 100
TRAIN_BS = 32
PATH_TO_CIFAR100_SFU_CV = "data/"

In [4]:
ls data/cifar100/

test_cs543      test_cs543 2    test_cs543 8    train_cs543 13  train_cs543 5
test_cs543 10   test_cs543 3    test_cs543 9    train_cs543 14  train_cs543 6
test_cs543 11   test_cs543 4    train_cs543     train_cs543 15  train_cs543 7
test_cs543 12   test_cs543 5    train_cs543 10  train_cs543 2   train_cs543 8
test_cs543 13   test_cs543 6    train_cs543 11  train_cs543 3   train_cs543 9
test_cs543 14   test_cs543 7    train_cs543 12  train_cs543 4


In [4]:
def calculate_val_accuracy(valloader, is_gpu):
    """ Util function to calculate val set accuracy,
    both overall and per class accuracy
    Args:
        valloader (torch.utils.data.DataLoader): val set 
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """    
    correct = 0.
    total = 0.
    predictions = []
    device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')

    class_correct = list(0. for i in range(TOTAL_CLASSES))
    class_total = list(0. for i in range(TOTAL_CLASSES))

    for data in valloader:
        images, labels = data
        if is_gpu:
            images = images.to(device)
            labels = labels.to(device)
        outputs = net(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(list(predicted.cpu().numpy()))
        total += labels.size(0)
        # The following line reported an error for some students. Put a new version.
        # correct += (predicted == labels).sum()
        correct += torch.sum(predicted == labels).detach().cpu().numpy()

        # The following line reported an error for some students. Put a new version.
        # c = (predicted == labels).squeeze()
        c = torch.squeeze(predicted == labels).detach().cpu().numpy()	
        # Added for a fix.
        #c = c.cpu()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    class_accuracy = 100 * np.divide(class_correct, class_total)
    return 100*correct/total, class_accuracy

1.** Loading CIFAR100_SFU_CV**

We modify the dataset to create CIFAR100_SFU_CV dataset which consist of 45000 training images (450 of each class), 5000 validation images (50 of each class) and 10000 test images (100 of each class). The train and val datasets have labels while all the labels in the test set are set to 0.


In [5]:
# The output of torchvision datasets are PILImage images of range [0, 1].
# Using transforms.ToTensor(), transform them to Tensors of normalized range
# [-1, 1].


# <<TODO#1>> Use transforms.Normalize() with the right parameters to 
# make the data well conditioned (zero mean, std dev=1) for improved training.
# <<TODO#2>> Try using transforms.RandomCrop() and/or transforms.RandomHorizontalFlip()
# to augment training data.
# After your edits, make sure that test_transform should have the same data
# normalization parameters as train_transform
# You shouldn't have any data augmentation in test_transform (val or test data is never augmented).
# ---------------------
from hi import CIFAR100_SFU_CV
mean = [0,0,0]
std = [1,1,1]

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=3),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
# ---------------------

trainset = CIFAR100_SFU_CV(root=PATH_TO_CIFAR100_SFU_CV, fold="train",
                                        download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BS,
                                          shuffle=True, num_workers=2)
print("Train set size: "+str(len(trainset)))

valset = CIFAR100_SFU_CV(root=PATH_TO_CIFAR100_SFU_CV, fold="val",
                                       download=True, transform=test_transform)
valloader = torch.utils.data.DataLoader(valset, batch_size=TEST_BS,
                                         shuffle=False, num_workers=2)
print("Val set size: "+str(len(valset)))

testset = CIFAR100_SFU_CV(root=PATH_TO_CIFAR100_SFU_CV, fold="test",
                                       download=True, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=TEST_BS,
                                         shuffle=False, num_workers=2)
print("Test set size: "+str(len(testset)))

# The 100 classes for CIFAR100
classes = ['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm']



Train set size: 45000
Val set size: 5000
Test set size: 10000


In [6]:
########################################################################
# 2. Define a Convolution Neural Network
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# We provide a basic network that you should understand, run and
# eventually improve
# <<TODO>> Add more conv layers
# <<TODO>> Add more fully connected (fc) layers
# <<TODO>> Add regularization layers like Batchnorm.
#          nn.BatchNorm2d after conv layers:
#          http://pytorch.org/docs/master/nn.html#batchnorm2d
#          nn.BatchNorm1d after fc layers:
#          http://pytorch.org/docs/master/nn.html#batchnorm1d
# This is a good resource for developing a CNN for classification:
# http://cs231n.github.io/convolutional-networks/#layers

import torch.nn as nn
import torch.nn.functional as F
device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')

class SEBlock(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(SEBlock, self).__init__()
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction_ratio),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction_ratio, in_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size() # 现在还不确定x的shape包不包含batch_size
        # b->batch_szie, c->channel
        squeeze = self.pool(x).view(b, c)
        excition = self.fc(squeeze).view(b, c, 1, 1)
        return x * excition

class BottleneckBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, reduction_ratio=16):
        super(BottleneckBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels // 4, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(in_channels // 4)
        self.conv2 = nn.Conv2d(in_channels // 4, in_channels // 4, kernel_size=3, stride=stride, padding=1, groups=in_channels // 4, bias=False)
        self.bn2 = nn.BatchNorm2d(in_channels // 4)
        self.conv3 = nn.Conv2d(in_channels // 4, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.se_block = SEBlock(out_channels, reduction_ratio)
        self.stride = stride

        self.match_dimensions = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
        self.bn_match_dimensions = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = self.se_block(out)
        #print(out.shape)
        if self.stride != 1 or x.shape[1] != out.shape[1]:
            residual = self.bn_match_dimensions(self.match_dimensions(residual))
        #print(residual.shape)
        out += residual
        out = F.relu(out)
        return out

class make_block(nn.Module):
    def __init__(self, in_channels, out_channels, num):
        super(make_block, self).__init__()
        layers = []
        layers.append(BottleneckBlock(in_channels,out_channels))
        for i in range(1,num):
            layers.append(nn.Conv2d(out_channels, out_channels, 3, padding=1))
            layers.append(nn.BatchNorm2d(out_channels))
            layers.append(nn.ReLU())

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        #print(x.shape)
        x = self.net(x)
        return x

class BaseNet(nn.Module):
    def __init__(self):
        super(BaseNet, self).__init__()

        # <<TODO#3>> Add more conv layers with increasing
        # output channels
        # <<TODO#4>> Add normalization layers after conv
        # layers (nn.BatchNorm2d)

        # Also experiment with kernel size in conv2d layers (say 3
        # inspired from VGGNet)
        # To keep it simple, keep the same kernel size
        # (right now set to 5) in all conv layers.
        # Do not have a maxpool layer after every conv layer in your
        # deeper network as it leads to too much loss of information.

        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.se = SEBlock(64)

        self.layer1 = make_block(64, 128,2)

        self.layer2 = make_block(128, 256,2)
        
        self.layer3 = make_block(256,512,2)

        self.pool = nn.MaxPool2d(2, 2)

        # <<TODO#3>> Add more linear (fc) layers
        # <<TODO#4>> Add normalization layers after linear and

        # experiment inserting them before or after ReLU (nn.BatchNorm1d)
        # More on nn.sequential:
        # http://pytorch.org/docs/master/nn.html#torch.nn.Sequential

        self.fc_net = nn.Sequential(
            #nn.Linear(16 * 5 * 5, TOTAL_CLASSES//2),
            nn.Linear(512*4*4, 512*4*4),
            nn.BatchNorm1d(512*4*4),
            nn.ReLU(inplace=True),


            nn.Linear(512*4*4, TOTAL_CLASSES),
        )

    def forward(self, x):

        # <<TODO#3&#4>> Based on the above edits, you'll have
        # to edit the forward pass description here.
        x = self.se(F.relu(self.bn1(self.conv1(x))))

        x = self.pool(self.layer1(x))

        x = self.pool(self.layer2(x))

        x = self.pool(self.layer3(x))
        # print(x.shape)
        #x = x.view(-1, 16 * 5 * 5)
        x = x.view(-1, 512*4*4)
        x = self.fc_net(x)

        # No softmax is needed as the loss function in step 3
        # takes care of that

        return x

# Create an instance of the nn.module class defined above:
net = BaseNet()

# For training on GPU, we need to transfer net and data onto the GPU
# http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-on-gpu
if IS_GPU:
    net = net.to(device)

In [7]:
########################################################################
# 3. Define a Loss function and optimizer
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Here we use Cross-Entropy loss and SGD with momentum.
# The CrossEntropyLoss criterion already includes softmax within its
# implementation. That's why we don't use a softmax in our model
# definition.

import torch.optim as optim
criterion = nn.CrossEntropyLoss()

# Tune the learning rate.
# See whether the momentum is useful or not
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)

plt.ioff()
fig = plt.figure()
train_loss_over_epochs = []
val_accuracy_over_epochs = []


In [None]:

########################################################################
# 4. Train the network
# ^^^^^^^^^^^^^^^^^^^^
#
# We simply have to loop over our data iterator, and feed the inputs to the
# network and optimize. We evaluate the validation accuracy at each
# epoch and plot these values over the number of epochs
# Nothing to change here
# -----------------------------
device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
for epoch in range(EPOCHS):  # loop over the dataset multiple times

    running_loss = 0.0
    
    for i, data in tqdm.tqdm(enumerate(trainloader, 0)):
        # get the inputs
        inputs, labels = data
        
        if IS_GPU:
            inputs = inputs.to(device)
            labels = labels.to(device)

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    
    # Normalizing the loss by the total number of train batches
    running_loss/=len(trainloader)
    print('[%d] loss: %.3f' %
          (epoch + 1, running_loss))

    # Scale of 0.0 to 100.0
    # Calculate validation set accuracy of the existing model
    val_accuracy, val_classwise_accuracy = \
        calculate_val_accuracy(valloader, IS_GPU)
    print('Accuracy of the network on the val images: %d %%' % (val_accuracy))

    # # Optionally print classwise accuracies
    # for c_i in range(TOTAL_CLASSES):
    #     print('Accuracy of %5s : %2d %%' % (
    #         classes[c_i], 100 * val_classwise_accuracy[c_i]))

    train_loss_over_epochs.append(running_loss)
    val_accuracy_over_epochs.append(val_accuracy)
# -----------------------------


# Plot train loss over epochs and val set accuracy over epochs
# Nothing to change here
# -------------
plt.subplot(2, 1, 1)
plt.ylabel('Train loss')
plt.plot(np.arange(EPOCHS), train_loss_over_epochs, 'k-')
plt.title('train loss and val accuracy')
plt.xticks(np.arange(EPOCHS, dtype=int))
plt.grid(True)

plt.subplot(2, 1, 2)
# The line added for a bug fix.
val_accuracy_over_epochs = torch.tensor(val_accuracy_over_epochs, device = 'cpu')

plt.plot(np.arange(EPOCHS), val_accuracy_over_epochs, 'b-')
plt.ylabel('Val accuracy')
plt.xlabel('Epochs')
plt.xticks(np.arange(EPOCHS, dtype=int))
plt.grid(True)
plt.savefig("plot.png")
plt.close(fig)
print('Finished Training')
# -------------

1407it [02:07, 11.06it/s]

[1] loss: 3.881





Accuracy of the network on the val images: 19 %


1407it [01:52, 12.51it/s]

[2] loss: 3.079





Accuracy of the network on the val images: 31 %


1407it [01:51, 12.58it/s]

[3] loss: 2.550





Accuracy of the network on the val images: 40 %


1407it [01:52, 12.55it/s]

[4] loss: 2.201





Accuracy of the network on the val images: 46 %


1407it [01:52, 12.46it/s]

[5] loss: 1.958





Accuracy of the network on the val images: 49 %


1407it [01:51, 12.57it/s]

[6] loss: 1.759





Accuracy of the network on the val images: 53 %


1407it [01:51, 12.57it/s]

[7] loss: 1.602





Accuracy of the network on the val images: 54 %


1407it [01:52, 12.56it/s]

[8] loss: 1.472





Accuracy of the network on the val images: 55 %


1407it [01:51, 12.57it/s]

[9] loss: 1.348





Accuracy of the network on the val images: 57 %


1407it [01:52, 12.56it/s]

[10] loss: 1.256





Accuracy of the network on the val images: 58 %


1407it [01:52, 12.54it/s]

[11] loss: 1.152





Accuracy of the network on the val images: 59 %


1407it [01:51, 12.58it/s]

[12] loss: 1.066





Accuracy of the network on the val images: 60 %


1407it [01:51, 12.58it/s]

[13] loss: 1.000





Accuracy of the network on the val images: 60 %


1407it [01:51, 12.59it/s]

[14] loss: 0.920





Accuracy of the network on the val images: 61 %


1407it [01:51, 12.58it/s]

[15] loss: 0.851





Accuracy of the network on the val images: 62 %


1407it [01:52, 12.50it/s]

[16] loss: 0.773





Accuracy of the network on the val images: 61 %


1407it [01:53, 12.42it/s]

[17] loss: 0.718





Accuracy of the network on the val images: 62 %


1407it [01:52, 12.48it/s]

[18] loss: 0.666





Accuracy of the network on the val images: 63 %


1407it [01:52, 12.45it/s]

[19] loss: 0.617





Accuracy of the network on the val images: 63 %


1407it [01:52, 12.54it/s]

[20] loss: 0.562





Accuracy of the network on the val images: 63 %
Finished Training


In [None]:
########################################################################
# 5. Try the network on test data, and create .csv file
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
########################################################################

# Check out why .eval() is important!
# https://discuss.pytorch.org/t/model-train-and-model-eval-vs-model-and-model-eval/5744/2
net.eval()

total = 0
predictions = []
for data in testloader:
    images, labels = data

    # For training on GPU, we need to transfer net and data onto the GPU
    # http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-on-gpu
    if IS_GPU:
        images = images.to(device)
        labels = labels.to(device)
    
    outputs = net(Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    predictions.extend(list(predicted.cpu().numpy()))
    total += labels.size(0)

with open('submission_netid.csv', 'w') as csvfile:
    wr = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
    wr.writerow(["Id", "Prediction1"])
    for l_i, label in enumerate(predictions):
        wr.writerow([str(l_i), str(label)])


# Part 2

In [6]:
"""Headers"""
import os
import os.path as osp
import time

%matplotlib inline
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim

from torchvision import datasets

# Pre-Trained Model

TODO1. Load pretrained resnet model. Experiment with different models. 

TODO2: Replace last fc layer

TODO3. Forward pass

In [7]:
class PreTrainedResNet(nn.Module):
  def __init__(self, num_classes, feature_extracting):
    super(PreTrainedResNet, self).__init__()
    
    #TODO1: Load pre-trained ResNet Model
    self.resnet18 = models.resnet18(pretrained=True)

    #Set gradients to false
    if feature_extracting:
      for param in self.resnet18.parameters():
          param.requires_grad = False
    
    #Replace last fc layer
    num_feats = self.resnet18.fc.in_features
    self.resnet18.fc = nn.Linear(num_feats, num_classes)
    #TODO2: Replace fc layer in resnet to a linear layer of size (num_feats, num_classes)
    
  def forward(self, x):
    #TODO3: Forward pass x through the model
    x = self.resnet18(x)
    
    return x

# Train

In [8]:

device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
def train(model, optimizer, criterion, epoch, num_epochs):
  model.train()
  epoch_loss = 0.0
  epoch_acc = 0.0
  
  for batch_idx, (images, labels) in enumerate(dataloaders['train']):
    #zero the parameter gradients
    optimizer.zero_grad()
    
    #move to GPU
    images, labels = images.to(device), labels.to(device)
    
    #forward
    outputs = model.forward(images)
    
    loss = criterion(outputs, labels)
    
    _, preds = torch.max(outputs.data, 1)
    
    loss.backward()
    optimizer.step()
    
    epoch_loss += loss.item()
    epoch_acc += torch.sum(preds == labels).item()
    
  epoch_loss /= dataset_sizes['train']
  epoch_acc /= dataset_sizes['train']
  
  print('TRAINING Epoch %d/%d Loss %.4f Accuracy %.4f' % (epoch, num_epochs, epoch_loss, epoch_acc))

# Main

1. Vary hyperparams
2. Data augmentation

In [9]:
#TODO: Vary Hyperparams
import tqdm
NUM_EPOCHS = 50
LEARNING_RATE = 0.0001 
BATCH_SIZE = 8
# RESNET_LAST_ONLY = True #Fine tunes only the last layer. Set to False to fine tune entire network
RESNET_LAST_ONLY = False
device = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
root_path = 'data/' #If your data is in a different folder, set the path accodordingly
mean = [0,0,0]
std = [1,1,1]
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        #transforms.CenterCrop(224),
        #TODO: Transforms.RandomResizedCrop() instead of CenterCrop(), RandomRoate() and Horizontal Flip()
        transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        #TODO: Transforms.Normalize()
        transforms.Normalize(mean,std)
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        #TODO: Transforms.Normalize()
        transforms.Normalize(mean,std)
    ]),
}

# loading datasets with PyTorch ImageFolder
image_datasets = {x: datasets.ImageFolder(os.path.join(root_path, x),
                                          data_transforms[x])
                  for x in ['train', 'test']}

# defining data loaders to load data using image_datasets and transforms, here we also specify batch size for the mini batch
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

#Initialize the model
model = PreTrainedResNet(len(class_names), RESNET_LAST_ONLY)
model = model.to(device)

#Setting the optimizer and loss criterion
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
criterion = nn.CrossEntropyLoss()

#Begin Train
for epoch in tqdm.tqdm(range(NUM_EPOCHS)):
    train(model, optimizer, criterion, epoch+1, NUM_EPOCHS)

print("Finished Training")
print("-"*10)

  2%|▏         | 1/50 [01:40<1:22:07, 100.57s/it]

TRAINING Epoch 1/50 Loss 0.6542 Accuracy 0.0195


  4%|▍         | 2/50 [03:13<1:16:47, 95.99s/it] 

TRAINING Epoch 2/50 Loss 0.5987 Accuracy 0.0919


  6%|▌         | 3/50 [04:46<1:14:01, 94.50s/it]

TRAINING Epoch 3/50 Loss 0.5429 Accuracy 0.1931


  8%|▊         | 4/50 [06:19<1:12:03, 94.00s/it]

TRAINING Epoch 4/50 Loss 0.4912 Accuracy 0.2851


 10%|█         | 5/50 [07:52<1:10:16, 93.70s/it]

TRAINING Epoch 5/50 Loss 0.4481 Accuracy 0.3618


 12%|█▏        | 6/50 [09:25<1:08:34, 93.52s/it]

TRAINING Epoch 6/50 Loss 0.4121 Accuracy 0.4162


 14%|█▍        | 7/50 [10:58<1:06:53, 93.34s/it]

TRAINING Epoch 7/50 Loss 0.3822 Accuracy 0.4662


 16%|█▌        | 8/50 [12:31<1:05:13, 93.17s/it]

TRAINING Epoch 8/50 Loss 0.3545 Accuracy 0.5127


 18%|█▊        | 9/50 [14:04<1:03:36, 93.09s/it]

TRAINING Epoch 9/50 Loss 0.3297 Accuracy 0.5441


 20%|██        | 10/50 [15:36<1:01:58, 92.95s/it]

TRAINING Epoch 10/50 Loss 0.3081 Accuracy 0.5781


 22%|██▏       | 11/50 [17:08<1:00:12, 92.63s/it]

TRAINING Epoch 11/50 Loss 0.2873 Accuracy 0.6083


 24%|██▍       | 12/50 [18:40<58:31, 92.41s/it]  

TRAINING Epoch 12/50 Loss 0.2722 Accuracy 0.6330


 26%|██▌       | 13/50 [20:12<56:54, 92.29s/it]

TRAINING Epoch 13/50 Loss 0.2583 Accuracy 0.6475


 28%|██▊       | 14/50 [21:44<55:20, 92.23s/it]

TRAINING Epoch 14/50 Loss 0.2397 Accuracy 0.6800


 30%|███       | 15/50 [23:16<53:44, 92.12s/it]

TRAINING Epoch 15/50 Loss 0.2279 Accuracy 0.6936


 32%|███▏      | 16/50 [24:49<52:15, 92.21s/it]

TRAINING Epoch 16/50 Loss 0.2179 Accuracy 0.7015


 34%|███▍      | 17/50 [26:22<50:50, 92.45s/it]

TRAINING Epoch 17/50 Loss 0.2040 Accuracy 0.7285


 36%|███▌      | 18/50 [27:55<49:29, 92.80s/it]

TRAINING Epoch 18/50 Loss 0.1947 Accuracy 0.7398


 38%|███▊      | 19/50 [29:29<48:02, 92.97s/it]

TRAINING Epoch 19/50 Loss 0.1877 Accuracy 0.7501


 40%|████      | 20/50 [31:02<46:28, 92.94s/it]

TRAINING Epoch 20/50 Loss 0.1801 Accuracy 0.7536


 42%|████▏     | 21/50 [32:34<44:55, 92.94s/it]

TRAINING Epoch 21/50 Loss 0.1671 Accuracy 0.7731


 44%|████▍     | 22/50 [34:08<43:23, 92.98s/it]

TRAINING Epoch 22/50 Loss 0.1616 Accuracy 0.7844


 46%|████▌     | 23/50 [35:40<41:47, 92.85s/it]

TRAINING Epoch 23/50 Loss 0.1590 Accuracy 0.7864


 48%|████▊     | 24/50 [37:13<40:10, 92.72s/it]

TRAINING Epoch 24/50 Loss 0.1491 Accuracy 0.8017


 50%|█████     | 25/50 [38:45<38:36, 92.67s/it]

TRAINING Epoch 25/50 Loss 0.1447 Accuracy 0.8049


 52%|█████▏    | 26/50 [40:17<37:01, 92.57s/it]

TRAINING Epoch 26/50 Loss 0.1389 Accuracy 0.8116


 54%|█████▍    | 27/50 [41:50<35:31, 92.66s/it]

TRAINING Epoch 27/50 Loss 0.1345 Accuracy 0.8200


 56%|█████▌    | 28/50 [43:23<33:57, 92.63s/it]

TRAINING Epoch 28/50 Loss 0.1292 Accuracy 0.8226


 58%|█████▊    | 29/50 [44:55<32:24, 92.62s/it]

TRAINING Epoch 29/50 Loss 0.1246 Accuracy 0.8323


 60%|██████    | 30/50 [46:28<30:52, 92.60s/it]

TRAINING Epoch 30/50 Loss 0.1206 Accuracy 0.8339


 62%|██████▏   | 31/50 [48:01<29:19, 92.61s/it]

TRAINING Epoch 31/50 Loss 0.1179 Accuracy 0.8380


 64%|██████▍   | 32/50 [49:33<27:46, 92.60s/it]

TRAINING Epoch 32/50 Loss 0.1098 Accuracy 0.8537


 66%|██████▌   | 33/50 [51:06<26:12, 92.53s/it]

TRAINING Epoch 33/50 Loss 0.1128 Accuracy 0.8423


 68%|██████▊   | 34/50 [52:38<24:38, 92.43s/it]

TRAINING Epoch 34/50 Loss 0.1101 Accuracy 0.8506


 70%|███████   | 35/50 [54:10<23:05, 92.36s/it]

TRAINING Epoch 35/50 Loss 0.1053 Accuracy 0.8535


 72%|███████▏  | 36/50 [55:42<21:32, 92.36s/it]

TRAINING Epoch 36/50 Loss 0.1046 Accuracy 0.8600


 74%|███████▍  | 37/50 [57:15<20:00, 92.36s/it]

TRAINING Epoch 37/50 Loss 0.1007 Accuracy 0.8585


 76%|███████▌  | 38/50 [58:47<18:28, 92.36s/it]

TRAINING Epoch 38/50 Loss 0.0970 Accuracy 0.8663


 78%|███████▊  | 39/50 [1:00:19<16:56, 92.38s/it]

TRAINING Epoch 39/50 Loss 0.0936 Accuracy 0.8690


 80%|████████  | 40/50 [1:01:52<15:23, 92.36s/it]

TRAINING Epoch 40/50 Loss 0.0946 Accuracy 0.8649


 82%|████████▏ | 41/50 [1:03:24<13:51, 92.38s/it]

TRAINING Epoch 41/50 Loss 0.0904 Accuracy 0.8729


 84%|████████▍ | 42/50 [1:04:57<12:19, 92.40s/it]

TRAINING Epoch 42/50 Loss 0.0877 Accuracy 0.8751


 86%|████████▌ | 43/50 [1:06:29<10:46, 92.38s/it]

TRAINING Epoch 43/50 Loss 0.0847 Accuracy 0.8829


 88%|████████▊ | 44/50 [1:08:01<09:14, 92.39s/it]

TRAINING Epoch 44/50 Loss 0.0839 Accuracy 0.8816


 90%|█████████ | 45/50 [1:09:34<07:41, 92.39s/it]

TRAINING Epoch 45/50 Loss 0.0849 Accuracy 0.8764


 92%|█████████▏| 46/50 [1:11:06<06:09, 92.35s/it]

TRAINING Epoch 46/50 Loss 0.0809 Accuracy 0.8885


 94%|█████████▍| 47/50 [1:12:38<04:36, 92.33s/it]

TRAINING Epoch 47/50 Loss 0.0799 Accuracy 0.8846


 96%|█████████▌| 48/50 [1:14:11<03:04, 92.39s/it]

TRAINING Epoch 48/50 Loss 0.0799 Accuracy 0.8836


 98%|█████████▊| 49/50 [1:15:43<01:32, 92.39s/it]

TRAINING Epoch 49/50 Loss 0.0775 Accuracy 0.8870


100%|██████████| 50/50 [1:17:15<00:00, 92.72s/it]

TRAINING Epoch 50/50 Loss 0.0754 Accuracy 0.8940
Finished Training
----------





# Test

In [10]:
def test(model, criterion, repeats=2):
  model.eval()
  
  test_loss = 0.0
  test_acc = 0.0
  
  with torch.no_grad():
    for itr in range(repeats):
      for batch_idx, (images, labels) in enumerate(dataloaders['test']):
        #move to GPU
        images, labels = images.to(device), labels.to(device)

        #forward
        outputs = model.forward(images)

        loss = criterion(outputs, labels)

        _, preds = torch.max(outputs.data, 1)

        test_loss += loss.item()
        test_acc += torch.sum(preds == labels).item()

    test_loss /= (dataset_sizes['test']*repeats)
    test_acc /= (dataset_sizes['test']*repeats)

    print('Test Loss: %.4f Test Accuracy %.4f' % (test_loss, test_acc))


In [11]:
test(model, criterion)

Test Loss: 0.1970 Test Accuracy 0.5942


# Visualizing the model predictions

Only for viusalizing. Nothing to be done here. 

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(1)  # pause a bit so that plots are updated
    
def visualize_model(model, num_images=8):
    images_so_far = 0
    fig = plt.figure()

    for batch_idx, (images, labels) in enumerate(dataloaders['test']):
        #move to GPU
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        
        _, preds = torch.max(outputs.data, 1)
       

        for j in range(images.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('class: {} predicted: {}'.format(class_names[labels.data[j]], class_names[preds[j]]))

            imshow(images.cpu().data[j])

            if images_so_far == num_images:
                return

In [None]:
visualize_model(model)