# Models notebook


In [2]:
import utils as local_utils
import torch
import torch.nn as nn
#import pytorch_utils
import torch.optim as optim
import torch.optim as optim
import torch.utils.data as DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import sys
import matplotlib.pyplot as plt
from PIL import Image
import PIL
from importlib import reload
import torch.nn.functional as F
from torchvision import transforms, utils, models
import os, sys
from random import shuffle

%matplotlib inline

You have an option to **use GPU by setting the flag to True below**. It is not necessary to use GPU for this assignment. Note that if your computer does not have CUDA enabled, `torch.cuda.is_available()` will return False and this notebook will fallback to CPU mode.

The global variables `dtype` and `device` will control the data types throughout this assignment.

In [3]:
USE_GPU = True
IMG_SZ = 224
#IMG_SZ = 32

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cpu


Helper functions

In [4]:
def get_files(src_folder, num_classes, num_per_class):
    '''
    returns a list of filenames for num_classes and num_per_class images per class
    '''
    src_files = [f for f in os.listdir(src_folder) if os.path.isfile(os.path.join(src_folder, f)) 
                 and f != '.DS_Store']
    
    hist = [0] * num_classes
    
    res_files = []
    
    for f in src_files:
        label = int(f[f.index('_') + 1 : f.index('.')]) # filename format: [id_label.jpg]
        if label >= num_classes: continue
        if hist[label] < num_per_class:
            res_files.append(f)
            hist[label] += 1
            
        if sum(hist) == num_classes * num_per_class:
            break
            
    shuffle(res_files)  # shuffles in place
    
    return res_files

# test implementation
# src_folder = '../data/data_100c/train'
# src_files = get_files(src_folder, num_classes=100, num_per_class=200)
# print (len(src_files))
# print (src_files[:10])

In [5]:
def load_image(infilename):
    '''
    opens image at provided path, returns numpy array 
    https://stackoverflow.com/questions/7762948/how-to-convert-an-rgb-image-to-numpy-array
    '''
    img = Image.open(infilename)
    img.load()
    #data = np.asarray( img, dtype="int32" )
    data = np.asarray(img, dtype='uint8')
    return data


def load_data(src_folder, num_classes, num_per_class):
    src_files = get_files(src_folder, num_classes, num_per_class)

    # this is if we know how much to load in advance
    NUM_TO_LOAD = num_classes * num_per_class
    estimated_N = NUM_TO_LOAD
    X, Y = np.empty([estimated_N, IMG_SZ, IMG_SZ, 3]), np.empty([estimated_N])

    position = 0
    for i in range(len(src_files)):
        _file = src_files[i]
        x = load_image(src_folder + '/' + _file)  # numpy array [IMG_SZ x IMG_SZ x 3]
    
        y = _file[_file.index('_') + 1 : _file.index('.')]  # filename format: [id_label.jpg]
        
        X[position] = x
        Y[position] = y
        position = position + 1

        if i % (1000) == 0: print ('i', i)
        
    X = X[:position]
    Y = Y[:position]
    
    # normalizing the data to be between 0 and 1
    print ('normalizing data')
    
    # normalizing the data by the mean and std dev that https://pytorch.org/docs/master/torchvision/models.html asks for
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    v_min = X.min(axis=(0, 1, 2), keepdims=True)
    v_max = X.max(axis=(0, 1, 2), keepdims=True)
    print(v_min.shape)
#     X = np.subtract(X, v_min)
#     X = np.divide(X, np.subtract(v_max, v_min))
#     X = np.divide(np.subtract(X-mean), std)
    for num in range(3):
        print(num)
        X[:,:,:,num] = X[:,:,:,num] - v_min[:,:,:,num]
        temp = v_max[:,:,:,num] - v_min[:,:,:,num]
        X[:,:,:,num] = X[:,:,:,num] / temp
        X[:,:,:,num] = X[:,:,:,num] - mean[num]
        X[:,:,:,num] = X[:,:,:,num] / std[num]
    print ('load_data returning X', X.shape)
    return X, Y

In [6]:
def loadData(X, Y, batch_size):
    values = DataLoader.TensorDataset(torch.from_numpy(X), torch.from_numpy(Y))
    loader_values = DataLoader.DataLoader(dataset=values,
        batch_size = batch_size)
    return loader_values

In [7]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

def random_weight(shape):
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
    if len(shape) == 2:  # FC weight
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH, kW]
    # randn is standard normal distribution generator. 
    w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in)
    w.requires_grad = True
    return w

In [8]:
def check_accuracy(loader, model):
    # if loader.dataset.train:
    #     print('Checking accuracy on validation set')
    # else:
    #     print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for t, (x, y) in enumerate(loader):
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)

        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return acc

In [9]:
def train(model, optimizer, loader_train, epochs=1, stop=1):
    """
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    iters = []
    losses = []
    for e in range(epochs):
        print ('epoch', e)
        
        num_iters = len(loader_train)
        want_print = 10
        print_every = 50
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
        
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print(' Iteration %d out of %d, loss = %.4f' % (t, num_iters, loss.item()))
                iters.append(t)
                losses.append(loss.item())
            
            # break early if we only want to use a part of the dataset (for hyperparameter tuning)
            if t > stop * num_iters:
                break

    return iters, losses

In [10]:
# Convolutional layer with channel_1 5x5 filters with zero-padding of 2
# ReLU
# Fully-connected layer to num_classes classes
class TwoLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, num_classes, filter_size, zero_padding):
        super(TwoLayerConvNet, self).__init__()
        self.conv_w1 = nn.Conv2d(in_channel, channel_1, filter_size, 1, (zero_padding,zero_padding))
        nn.init.kaiming_normal_(self.conv_w1.weight)
        self.fc1 = nn.Linear(channel_1*IMG_SZ*IMG_SZ, num_classes)
        nn.init.kaiming_normal_(self.fc1.weight)

    def forward(self, x):
        scores = None
        x = F.relu(self.conv_w1(x))
        x = flatten(x)
        scores = self.fc1(x)
        return scores

class TwoLayerFC(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()
        # assign layer objects to class attributes
        self.fc1 = nn.Linear(input_size, hidden_size)
        # nn.init package contains convenient initialization methods
        # http://pytorch.org/docs/master/nn.html#torch-nn-init 
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)
    
    def forward(self, x):
        # forward always defines connectivity
        x = flatten(x)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores

In [11]:
def runFC(hidden_layer_size, num_classes):
    return TwoLayerFC(3 * IMG_SZ * IMG_SZ, hidden_layer_size, num_classes)

def runTwoLayerCNN(num_classes):
    num_channels = 10
    return TwoLayerConvNet(3, num_channels, num_classes, 5, 2)

load training data

In [12]:
def getLoaderData(directory, batch_size,  num_classes, num_per_class):
    X, Y = load_data(directory, num_classes, num_per_class)
    N = X.shape[0]

    # previously, X is: N x 256 x 256 x 3 ; make channels second
    X = np.transpose(X, (0, 3, 1, 2))  # N x 3 x 256 x 256

    num_classes = len(set(Y))
    # training_portion = 1
    # num_train = int(N * training_portion)

    loader_data = loadData(X, Y, batch_size)

    print ('x', X.shape)
    print ('y', Y[:5])
    print ('num_classes', num_classes)
    return loader_data

In [None]:
# this loads the training data
loader_train = getLoaderData('../data/data_200c/train', 50, 200, 150)


train / val plot

visualization

In [None]:
# visualization
def show(img):
    npimg = img.numpy().astype(dtype='uint8')
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')

for t, (x, y) in enumerate(loader_train):
    for i in range(2):
        yi = y[i]
        img = x[i]
        show(img)
        plt.show()

In [None]:
src_folder = '../data/data_100c/train/'
src_files = [f for f in os.listdir(src_folder) if os.path.isfile(os.path.join(src_folder, f)) and f != '.DS_Store']

nRows = 3
nCols = 5

f, axarr = plt.subplots(nRows, nCols)

f.set_figheight(10)
f.set_figwidth(15)

n = 0
for i in range(nRows):
    for j in range(nCols):
        img = PIL.Image.open(src_folder + src_files[n])
        axarr[i, j].imshow(img)
        #axarr[i, j].axis('off')
        n += 1

plt.figure()
plt.show()

In [None]:
reload(pytorch_utils)
pytorch_utils.hi()

Run model

In [None]:
hidden_layer_size = 300
learning_rates = [0.0000001,0.00001, 0.001, 0.1]
training_portion = 0.8
num_epochs = 10

print("Num classes is ", num_classes)
print("Num samples being considered in training is ", num_train)
print("Num samples in val is ", N - num_train)
print("hidden_layer_size is ", hidden_layer_size)
print("batch_size is ", batch_size)

model = runTwoLayerCNN(num_classes)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.5)
# acc = train(model, optimizer, loader_train, loader_val, num_epochs)

In [None]:
# We need to wrap `flatten` function in a module in order to stack it
# in nn.Sequential

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)
    
learning_rate = 1e-2

num_hidden = 50000
model = nn.Sequential(
    # input is C x H x W
    
    # Two conv layers, 64 filters, and pooling
    nn.Conv2d(3, 64, 3, padding=1),    # 64 x H x W
    nn.ReLU(),                         # 64 x H x W
    # Batch normalization              # 64 x H x W
    nn.Conv2d(64, 64, 3, padding=1),   # 64 x H x W
    # RESIDUAL                         # 64 x H x W
    nn.ReLU(),                         # 64 x H x W
    # Batch normalization              # 64 x H x W
    nn.MaxPool2d(2),                   # 64 x H/2 x W/2
    # INCEPTION
    
    # Two conv layers, 128 filters, and pooling
    nn.Conv2d(64, 128, 3, padding=1),  # 128 x H/2 x W/2
    nn.ReLU(),                         # 128 x H/2 x W/2
    # Batch normalization              # 128 x H/2 x W/2
    nn.Conv2d(128, 128, 3, padding=1), # 128 x H/2 x W/2
    # RESIDUAL                         # 128 x H/2 x W/2
    nn.ReLU(),                         # 128 x H/2 x W/2
    # Batch normalization              # 128 x H/2 x W/2
    nn.MaxPool2d(2),                   # 128 x H/4 x W/4
    # INCEPTION                        
    
    # Two conv layers, 256 filters, and pooling
    nn.Conv2d(128, 256, 3, padding=1), # 256 x H/4 x W/4
    nn.ReLU(),                         # 256 x H/4 x W/4
    # Batch normalization              # 256 x H/4 x W/4
    nn.Conv2d(256, 256, 3, padding=1), # 256 x H/4 x W/4
    # RESIDUAL                         # 256 x H/4 x W/4
    nn.ReLU(),                         # 256 x H/4 x W/4
    # Batch normalization              # 256 x H/4 x W/4
    nn.MaxPool2d(2),                   # 256 x H/8 x W/8
    # INCEPTION
    
    Flatten(),
    nn.Linear(256 * 28 * 28, num_hidden),
    nn.Linear(num_hidden, num_classes),
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

#acc = train(model, optimizer, loader_train, loader_val, num_epochs)

In [1]:
# resnet
# https://www.kaggle.com/gntoni/using-pytorch-resnet

# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                                      std=[0.229, 0.224, 0.225])

plotting_data = []

best_model = None
best_loss = float('inf')
lr_vals = [0.0001, 0.001, 0.01]
momentum = 0.9
batch_sizes = [10, 20, 50, 80]
best_lr = None
best_batch_size = None
for batch_size in batch_sizes:
    loader_train = getLoaderData('../data/data_200c/train', batch_size, 200, 150)
    for lr in lr_vals:
        model = models.resnet18(pretrained=True)  # 18
        
        
        # Observe that all parameters are being optimized
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

        num_epochs = 1
        iters, losses = train(model, optimizer, loader_train, num_epochs, 0.20) #stopping early to save time
        
        s = 'lr: %f, batch sz: %d' % (lr, batch_size)
        plotting_data.append((iters, losses, s))
        
        final_loss = losses[-1]
        print('got a loss of ', final_loss, ' for parameters ', 'batch size: ', batch_size, ' lr: ', lr)
        if final_loss < best_loss:
            best_loss = final_loss
            best_model = model
            best_lr = lr
            best_batch_size = batch_size
print("best values ", best_loss, best_lr, best_batch_size)

val_acc = check_accuracy(loader_val, best_model)
print ('validation accuracy is ', val_acc)
train_acc = check_accuracy(loader_train, best_model)
print ('training accuracy is ', train_acc)

for plot_data in plotting_data:
    iters, losses, label = plot_data
    plt.plot(iters, losses, label=label)
plt.title('Loss for different hyperparameters')
plt.xlabel('iters')
plt.ylabel('loss')
plt.legend()
plt.plot()

NameError: name 'getLoaderData' is not defined

In [None]:
# this loads the validation data

loader_val = getLoaderData('../data/data_200c/val', best_batch_size, 200, 40)
print ('validation accuracy')
val_acc = check_accuracy(loader_val, best_model)

In [None]:
# plot based on acc_data object

# final_train_acc = acc_data['train_accs']
# val_accs = acc_data['val_accs']
# iterations = acc_data['iters']

val_accs = [4.6414, 4.2146, 3.6606, 3.0006, 2.5316, 2.0794, 1.8258, 1.6168]
# final_train_acc = 71.14
iterations = [0, 50, 100, 150, 200, 250, 300, 315]

# plot
plt.plot(iterations, val_accs, label='validation accuracy')
plt.title('resnet152 on 16K train, 4K val')
plt.xlabel('iteration')
plt.ylabel('accuracy')
plt.legend()
plt.plot()

plotting val / train accuracy curves

In [None]:
# this is hardcoded for the first run
train_acc = [1.92, 3.07, 4.01, 9.02, 16.16, 23.79, 32.23, 33.89]
val_acc =   [2.20, 3.08, 3.84, 8.56, 14.73, 21.27, 28.89, 30.90]
iterations = [0, 5, 10, 15, 20, 25, 30, 31]

# plot
plt.plot(iterations, train_acc, label='train accuracy')
plt.plot(iterations, val_acc, label='validation accuracy')
plt.title('resnet152 on 6.4K train, 1.6K val')
plt.xlabel('iteration')
plt.ylabel('accuracy')
plt.legend()
plt.plot()

From this, we see that there is a small gap between train and val, so the model does not seem to be overfitting too much. Also, it seems like the accuracy is increasing almost linearly, indicating that training for more epochs would improve accuracy.

In [None]:
# vgg
model = models.vgg11(pretrained=True)

# https://medium.com/@14prakash/almost-any-image-classification-problem-using-pytorch-i-am-in-love-with-pytorch-26c7aa979ec4
# Number of filters in the bottleneck layer
num_ftrs = model.classifier[6].in_features
# convert all the layers to list and remove the last one
features = list(model.classifier.children())[:-1]
## Add the last layer based on the num of classes in our dataset
features.extend([nn.Linear(num_ftrs, num_classes)])
## convert it into container and add it to our model class.
model.classifier = nn.Sequential(*features)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
acc = train(model, optimizer, loader_train, loader_val, num_epochs)

In [None]:
num_classes