# Model Analysis
This process is done to debug your model and record performance metric for the model

## Data Preparation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import numpy as np

In [None]:
NUM_TRAIN = 1000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./cs682/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs682/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs682/datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial
device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 10

print('using device:', device)

using device: cpu


In [None]:
## Helper functions
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

def test_flatten():
    x = torch.arange(12).view(2, 1, 3, 2)
    print('Before flattening: ', x)
    print('After flattening: ', flatten(x))

def random_weight(shape):
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
    if len(shape) == 2:  # FC weight
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH, kW]
    # randn is standard normal distribution generator. 
    w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in)
    w.requires_grad = True
    return w

def zero_weight(shape):
    return torch.zeros(shape, device=device, dtype=dtype, requires_grad=True)

# create a weight of shape [3 x 5]
# you should see the type `torch.cuda.FloatTensor` if you use GPU. 
# Otherwise it should be `torch.FloatTensor`
random_weight((3, 5))

tensor([[ 0.4515,  0.0460, -2.6377, -0.7387, -1.8595],
        [ 0.9307,  0.4421, -0.1848, -0.7475,  0.5352],
        [-0.0030, -0.1368, -0.6674, -0.4189,  0.9290]], requires_grad=True)

## Model Architecture

In [None]:
def three_layer_convnet(x, params):
    """
    Performs the forward pass of a three-layer convolutional network with the
    architecture defined above.

    Inputs:
    - x: A PyTorch Tensor of shape (N, 3, H, W) giving a minibatch of images
    - params: A list of PyTorch Tensors giving the weights and biases for the
      network; should contain the following:
      - conv_w1: PyTorch Tensor of shape (channel_1, 3, KH1, KW1) giving weights
        for the first convolutional layer
      - conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first
        convolutional layer
      - conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving
        weights for the second convolutional layer
      - conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second
        convolutional layer
      - fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you
        figure out what the shape should be?
      - fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you
        figure out what the shape should be?
    
    Returns:
    - scores: PyTorch Tensor of shape (N, C) giving classification scores for x
    """
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    ################################################################################
    # TODO: Implement the forward pass for the three-layer ConvNet.                #
    ################################################################################
    out1 = F.relu ( F.conv2d(x, conv_w1, conv_b1, padding = 2))
    out2 = F.relu ( F.conv2d(out1, conv_w2, conv_b2, padding = 1))
    scores = flatten(out2).mm(fc_w) + fc_b    
    ################################################################################
    #                                 END OF YOUR CODE                             #
    ################################################################################
    return scores

In [None]:
def three_layer_convnet_test():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)  # minibatch size 64, image size [3, 32, 32]

    conv_w1 = torch.zeros((6, 3, 5, 5), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]
    conv_b1 = torch.zeros((6,))  # out_channel
    conv_w2 = torch.zeros((9, 6, 3, 3), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]
    conv_b2 = torch.zeros((9,))  # out_channel

    # you must calculate the shape of the tensor after two conv layers, before the fully-connected layer
    fc_w = torch.zeros((9 * 32 * 32, 10))
    fc_b = torch.zeros(10)

    scores = three_layer_convnet(x, [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b])
    print(scores.size())  # you should see [64, 10]
three_layer_convnet_test()

torch.Size([64, 10])


In [None]:
def check_accuracy(loader, model_fn, params):
    """
    Check the accuracy of a classification model.
    
    Inputs:
    - loader: A DataLoader for the data split we want to check
    - model_fn: A function that performs the forward pass of the model,
      with the signature scores = model_fn(x, params)
    - params: List of PyTorch Tensors giving parameters of the model
    
    Returns: Nothing, but prints the accuracy of the model
    """
    split = 'val' if loader.dataset.train else 'test'
    print('Checking accuracy on the %s set' % split)
    num_correct, num_samples = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.int64)
            scores = model_fn(x, params)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        wandb.log({'accuracy': acc})
        print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))

In [None]:
def train(model_fn, params, learning_rate):
    """
    Train a model on CIFAR-10.
    
    Inputs:
    - model_fn: A Python function that performs the forward pass of the model.
      It should have the signature scores = model_fn(x, params) where x is a
      PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
      model weights, and scores is a PyTorch Tensor of shape (N, C) giving
      scores for the elements in x.
    - params: List of PyTorch Tensors giving weights for the model
    - learning_rate: Python scalar giving the learning rate to use for SGD
    
    Returns: Nothing
    """
    for t, (x, y) in enumerate(loader_train):
        # Move the data to the proper device (GPU or CPU)
        x = x.to(device=device, dtype=dtype)
        y = y.to(device=device, dtype=torch.long)

        # Forward pass: compute scores and loss
        scores = model_fn(x, params)
        loss = F.cross_entropy(scores, y)

        assert loss != 0.0
        assert not torch.isnan(loss)

        wandb.log({'loss': loss})

        # Backward pass: PyTorch figures out which Tensors in the computational
        # graph has requires_grad=True and uses backpropagation to compute the
        # gradient of the loss with respect to these Tensors, and stores the
        # gradients in the .grad attribute of each Tensor.
        loss.backward()

        # Update parameters. We don't want to backpropagate through the
        # parameter updates, so we scope the updates under a torch.no_grad()
        # context manager to prevent a computational graph from being built.
        with torch.no_grad():
            for w in params:
                assert not torch.isnan(w.grad).any()
                w -= learning_rate * w.grad

                # Manually zero the gradients after running the backward pass
                w.grad.zero_()

        if t % print_every == 0:
            print('Iteration %d, loss = %.4f' % (t, loss.item()))
            check_accuracy(loader_val, model_fn, params)
            print()

In [None]:
def trainer(learning_rate, channel_1, channel_2):
  conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b  = None, None, None, None, None, None

  ################################################################################
  # TODO: Initialize the parameters of a three-layer ConvNet.                    #
  ################################################################################
  conv_w1 = random_weight((channel_1, 3, 5, 5))
  conv_w2 = random_weight((channel_2, channel_1, 3, 3))
  conv_b1 = zero_weight((channel_1,))
  conv_b2 = zero_weight((channel_2,))
  fc_w = random_weight((channel_2*channel_1*channel_1 , 10))
  fc_b = zero_weight((10,))
  ################################################################################
  #                                 END OF YOUR CODE                             #
  ################################################################################

  params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
  train(three_layer_convnet, params, learning_rate)

In [None]:
#!pip install wandb

In [None]:
# Log in to your W&B account
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mnaik1210[0m (use `wandb login --relogin` to force relogin)


True

In [14]:
wandb.init(project="project-tutorial-3")

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁█
loss,███▅▆▅▄▄▆▄▄▄▄▅▃▁

0,1
accuracy,0.13553
loss,2.16754


In [15]:
import random

## Hyperparameters : Learning Rate, channel_1, channel_2

# Launch 5 simulated experiments
for run in range(5):
    lr = 10**np.random.uniform(-1,-7)
#    channel_1 = 2**np.random.randint(1,5)
#    channel_2 = 2**np.random.randint(1,5)
    channel_1 = 32
    channel_2 = 16
        
    ## Initialize wandb run
    wandb.init(
      project="project-tutorial-3",
      # Track hyperparameters and run metadata
      config={
      "learning_rate": lr,
      "channel_1": channel_1,
      "channel_2": channel_2,
      "architecture": "3-layer CNN",
      "dataset": "CIFAR-100",})
    
    trainer(lr,channel_1, channel_2)

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Iteration 0, loss = 3.1736
Checking accuracy on the val set
Got 4866 / 49000 correct (9.93%)

Iteration 10, loss = 2.4885
Checking accuracy on the val set
Got 6671 / 49000 correct (13.61%)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁█
loss,█▄▂▄▅▂▇▄▃▁▂▃▁▂▂▃

0,1
accuracy,0.13614
loss,2.58196


Iteration 0, loss = 3.6716
Checking accuracy on the val set
Got 6300 / 49000 correct (12.86%)

Iteration 10, loss = 2.4356
Checking accuracy on the val set
Got 6630 / 49000 correct (13.53%)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁█
loss,█▅▄▄▄▃▂▃▂▁▁▂▁▂▃▁

0,1
accuracy,0.13531
loss,2.35028


Iteration 0, loss = 3.9640
Checking accuracy on the val set
Got 4682 / 49000 correct (9.56%)

Iteration 10, loss = 3.1073
Checking accuracy on the val set
Got 5178 / 49000 correct (10.57%)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁█
loss,█▆▅▃▄▃▁▁▃▂▄▁▂▂▂▂

0,1
accuracy,0.10567
loss,2.71199


Iteration 0, loss = 2.9386
Checking accuracy on the val set
Got 4739 / 49000 correct (9.67%)

Iteration 10, loss = 3.1008
Checking accuracy on the val set
Got 4737 / 49000 correct (9.67%)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,█▁
loss,▃▅▄▁▆▃█▃▃▅▅▄▅▃▄▇

0,1
accuracy,0.09667
loss,3.27633


Iteration 0, loss = 2.9312
Checking accuracy on the val set
Got 4717 / 49000 correct (9.63%)

Iteration 10, loss = 2.7747
Checking accuracy on the val set
Got 4714 / 49000 correct (9.62%)

