# Import files

In [0]:
!pip install -q -U PyDrive # to get access to files

# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

file_id = '1M1-iIMEj4OVdKKN-pYrVpkHqMvm2M26h'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('Assignment_2.zip')
!unzip -q Assignment_2.zip
!ls Assignment_2

path = 'Assignment_2'

'correct csv.ipynb'		  ex2_FCnet.py	      __pycache__
'corrected results.csv'		  ex2_pytorch.py      results
'corrected results_renamed.csv'   get_datasets.sh     simul.csv
 datasets			  gradient_check.py   two_layernet.py
 data_utils.py			  plot_x_overleaf     vis_utils.py


# Implementation of two layered MLP

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [0]:
def weights_init(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0.0, 1e-3)
        m.bias.data.fill_(0.)

def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

#--------------------------------
# Device configuration
#--------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: %s'%device)

#--------------------------------
# Hyper-parameters
#--------------------------------
input_size = 32 * 32 * 3
hidden_size = [50]
num_classes = 10
num_epochs = 10
batch_size = 200
learning_rate = 1e-3
learning_rate_decay = 0.95
reg=0.001
num_training= 49000
num_validation =1000
train = True

#-------------------------------------------------
# Load the CIFAR-10 dataset
#-------------------------------------------------
norm_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                     ])
cifar_dataset = torchvision.datasets.CIFAR10(root=path+'datasets/',
                                           train=True,
                                           transform=norm_transform,
                                           download=True)

test_dataset = torchvision.datasets.CIFAR10(root=path+'datasets/',
                                          train=False,
                                          transform=norm_transform
                                          )
#-------------------------------------------------
# Prepare the training and validation splits
#-------------------------------------------------
mask = list(range(num_training))
train_dataset = torch.utils.data.Subset(cifar_dataset, mask)
mask = list(range(num_training, num_training + num_validation))
val_dataset = torch.utils.data.Subset(cifar_dataset, mask)

#-------------------------------------------------
# Data loader
#-------------------------------------------------
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to Assignment_2datasets/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting Assignment_2datasets/cifar-10-python.tar.gz to Assignment_2datasets/


In [0]:
torch.manual_seed(123)
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes):
        super(MultiLayerPerceptron, self).__init__() # it writes initialization for nn.Module class as well 
        #################################################################################
        # TODO: Initialize the modules required to implement the mlp with the layer     #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        # Make use of linear and relu layers from the torch.nn module                   #
        #################################################################################
        
        layers = [] #Use the layers list to store a variable number of layers
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        layers.append(nn.Linear(input_size, hidden_layers[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_layers[0], num_classes))
  

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # Enter the layers into nn.Sequential, so the model may "see" them
        # Note the use of * in front of layers
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        #################################################################################
        # TODO: Implement the forward pass computations                                 #
        # Note that you do not need to use the softmax operation at the end.            #
        # Softmax is only required for the loss computation and the criterion used below#
        # nn.CrossEntropyLoss() already integrates the softmax and the log loss together#
        #################################################################################
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        out = self.layers(x)
        

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        return out

model = MultiLayerPerceptron(input_size, hidden_size, num_classes).to(device)
# Print model's state_dict
'''
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
'''

if train:
    model.apply(weights_init)
    model.train() #set dropout and batch normalization layers to training mode

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

    # Train the model
    lr = learning_rate
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            #################################################################################
            # TODO: Implement the training code                                             #
            # 1. Pass the images to the model                                               #
            # 2. Compute the loss using the output and the labels.                          #
            # 3. Compute gradients and update the model using the optimizer                 #
            # Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
            #################################################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            # Pass images to the model to compute predicted labels
            images = images.view(-1, input_size)
            pred_labels = model(images) 
            
            
            # Compute the loss using the predicted labels and the actual labels.
            loss = criterion(pred_labels, labels)

            # Compute gradients and update the model using the optimizer
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        # Code to update the lr
        lr *= learning_rate_decay
        update_lr(optimizer, lr)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                ####################################################
                # TODO: Implement the evaluation code              #
                # 1. Pass the images to the model                  #
                # 2. Get the most confident predicted class        #
                ####################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                images = images.view(-1, input_size)
                predicted = torch.argmax(model(images), dim=1)


                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('Validataion accuracy is: {} %'.format(100 * correct / total))
    ##################################################################################
    # TODO: Now that you can train a simple two-layer MLP using above code, you can  #
    # easily experiment with adding more layers and different layer configurations   #
    # and let the pytorch library handle computing the gradients                     #
    #                                                                                #
    # Experiment with different number of layers (at least from 2 to 5 layers) and   #
    # record the final validation accuracies Report your observations on how adding  #
    # more layers to the MLP affects its behavior. Try to improve the model          #
    # configuration using the validation performance as the guidance. You can        #
    # experiment with different activation layers available in torch.nn, adding      #
    # dropout layers, if you are interested. Use the best model on the validation    #
    # set, to evaluate the performance on the test set once and report it            #
    ##################################################################################

    # Save the model checkpoint
    torch.save(model.state_dict(), 'model.ckpt')

else:
    # Run the test code once you have your by setting train flag to false
    # and loading the best model

    best_model = None
    best_model = torch.load('model.ckpt')
    
    model.load_state_dict(best_model)
    
    # Test the model
    model.eval() #set dropout and batch normalization layers to evaluation mode
    
    # In test phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            ####################################################
            # TODO: Implement the evaluation code              #
            # 1. Pass the images to the model                  #
            # 2. Get the most confident predicted class        #
            ####################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            images = images.view(-1, input_size)
            predicted = torch.argmax(model(images), dim=1)


            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if total == 1000:
                break

        print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))



Epoch [1/10], Step [100/245], Loss: 1.7975
Epoch [1/10], Step [200/245], Loss: 1.7045
Validataion accuracy is: 44.1 %
Epoch [2/10], Step [100/245], Loss: 1.4979
Epoch [2/10], Step [200/245], Loss: 1.5327
Validataion accuracy is: 46.1 %
Epoch [3/10], Step [100/245], Loss: 1.4750
Epoch [3/10], Step [200/245], Loss: 1.5688
Validataion accuracy is: 47.9 %
Epoch [4/10], Step [100/245], Loss: 1.5602
Epoch [4/10], Step [200/245], Loss: 1.4943
Validataion accuracy is: 49.9 %
Epoch [5/10], Step [100/245], Loss: 1.3469
Epoch [5/10], Step [200/245], Loss: 1.4505
Validataion accuracy is: 48.7 %
Epoch [6/10], Step [100/245], Loss: 1.2933
Epoch [6/10], Step [200/245], Loss: 1.5111
Validataion accuracy is: 50.5 %
Epoch [7/10], Step [100/245], Loss: 1.3567
Epoch [7/10], Step [200/245], Loss: 1.3667
Validataion accuracy is: 50.7 %
Epoch [8/10], Step [100/245], Loss: 1.3137
Epoch [8/10], Step [200/245], Loss: 1.2753
Validataion accuracy is: 50.9 %
Epoch [9/10], Step [100/245], Loss: 1.2609
Epoch [9/10],

# Grid search

In [0]:
import pandas as pd
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm_notebook as tqdm

torch.manual_seed(123)
class MultiLayerPerceptronGridSearch(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes, num_layers, activation_func, dropout=False):
        super(MultiLayerPerceptronGridSearch, self).__init__() # it writes initialization for nn.Module class as well 
        #################################################################################
        # TODO: Initialize the modules required to implement the mlp with the layer     #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        # Make use of linear and relu layers from the torch.nn module                   #
        #################################################################################
        
        layers = [] #Use the layers list to store a variable number of layers
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # input
        layers.append(nn.Linear(input_size, hidden_layers[0]))
        layers.append(activation_func)  
        if dropout == True: layers.append(nn.Dropout(p=0.3))

        # following layers
        for layer in range(num_layers-1):   
          layers.append(nn.Linear(hidden_layers[0], hidden_layers[0]))
          layers.append(activation_func) 
          if dropout == True: layers.append(nn.Dropout(p=0.3))

        # output 
        layers.append(nn.Linear(hidden_layers[0], num_classes))
          

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # Enter the layers into nn.Sequential, so the model may "see" them
        # Note the use of * in front of layers
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        #################################################################################
        # TODO: Implement the forward pass computations                                 #
        # Note that you do not need to use the softmax operation at the end.            #
        # Softmax is only required for the loss computation and the criterion used below#
        # nn.CrossEntropyLoss() already integrates the softmax and the log loss together#
        #################################################################################
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        out = self.layers(x)
        

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        return out

hyper_grid = {'num_layers': [2, 3, 4, 5, 10, 15], 'activation_func' : [nn.ReLU(), nn.PReLU(), nn.LeakyReLU(), nn.Sigmoid(), nn.Tanh()], 'flag':[True, False]}
grid = ParameterGrid(hyper_grid)
all_res = []
for hypers in tqdm(grid):

  # hyperparameters combination
  num_layers = hypers['num_layers']
  activation_func = hypers['activation_func']
  flag = hypers['flag']
  print('\n\n\n\nHyperparameters: num_layers = '+str(num_layers)+', activation_func = '+str(activation_func)+', flag = '+str(flag))

  # initialize model
  model = MultiLayerPerceptronGridSearch(input_size, hidden_size, num_classes, num_layers, activation_func, dropout=flag).to(device)

  model.apply(weights_init)
  
  # Loss and optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

  # Train the model
  lr = learning_rate
  total_step = len(train_loader)
  for epoch in range(num_epochs):
      correct_train = 0
      total_train = 0
      model.train()
      for i, (images, labels) in enumerate(train_loader):
          # Move tensors to the configured device
          images = images.to(device)
          labels = labels.to(device)
          #################################################################################
          # TODO: Implement the training code                                             #
          # 1. Pass the images to the model                                               #
          # 2. Compute the loss using the output and the labels.                          #
          # 3. Compute gradients and update the model using the optimizer                 #
          # Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
          #################################################################################
          # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

          # Pass images to the model to compute predicted labels
          images = images.view(-1, input_size)
          pred_labels = model(images) 
          

          # Compute the loss using the predicted labels and the actual labels.
          loss = criterion(pred_labels, labels)

          # Compute gradients and update the model using the optimizer
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          # Compute train accuracy
          predicted = torch.argmax(pred_labels, dim=1)
          total_train += labels.size(0)
          correct_train += (predicted == labels).sum().item()

          # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

          if (i+1) % 100 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

      # Code to update the lr
      lr *= learning_rate_decay
      update_lr(optimizer, lr)
      with torch.no_grad():
          correct = 0
          total = 0
          model.eval()
          for images, labels in val_loader:
              images = images.to(device)
              labels = labels.to(device)
              ####################################################
              # TODO: Implement the evaluation code              #
              # 1. Pass the images to the model                  #
              # 2. Get the most confident predicted class        #
              ####################################################
              # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
              images = images.view(-1, input_size)
              predicted = torch.argmax(model(images), dim=1)


              # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

          print('Validataion accuracy is: {} %'.format(100 * correct / total))
  all_res.append({'N. layers': num_layers, 'Activation': str(activation_func), 'Dropout': flag, 'train_acc': (100 * correct_train / total), 'val_acc': (100 * correct / total)})

# Save result in a dataframe
out_rs = pd.DataFrame(all_res)
out_rs.to_csv('/content/drive/My Drive/MLP_grid_search.csv', index=False)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=60), HTML(value='')))





Hyperparameters: num_layers = 2, activation_func = ReLU(), flag = True
Epoch [1/10], Step [100/245], Loss: 1.9975
Epoch [1/10], Step [200/245], Loss: 1.9415
Validataion accuracy is: 36.5 %
Epoch [2/10], Step [100/245], Loss: 1.6881
Epoch [2/10], Step [200/245], Loss: 1.7245
Validataion accuracy is: 40.7 %
Epoch [3/10], Step [100/245], Loss: 1.6677
Epoch [3/10], Step [200/245], Loss: 1.8874
Validataion accuracy is: 42.4 %
Epoch [4/10], Step [100/245], Loss: 1.7384
Epoch [4/10], Step [200/245], Loss: 1.6402
Validataion accuracy is: 43.8 %
Epoch [5/10], Step [100/245], Loss: 1.6779
Epoch [5/10], Step [200/245], Loss: 1.7089
Validataion accuracy is: 46.0 %
Epoch [6/10], Step [100/245], Loss: 1.7210
Epoch [6/10], Step [200/245], Loss: 1.6175
Validataion accuracy is: 46.7 %
Epoch [7/10], Step [100/245], Loss: 1.5776
Epoch [7/10], Step [200/245], Loss: 1.5729
Validataion accuracy is: 47.6 %
Epoch [8/10], Step [100/245], Loss: 1.5659
Epoch [8/10], Step [200/245], Loss: 1.6870
Validataion a

In [9]:
import pandas as pd
df = pd.read_csv('/content/drive/My Drive/MLP_grid_search.csv').sort_values('val_acc', ascending=False)
del df['train_acc']
df

Unnamed: 0,N. layers,Activation,Dropout,val_acc
6,2,ReLU(),False,51.8
18,2,PReLU(num_parameters=1),False,51.3
30,2,LeakyReLU(negative_slope=0.01),False,50.4
0,2,ReLU(),True,48.9
12,2,PReLU(num_parameters=1),True,48.5
24,2,LeakyReLU(negative_slope=0.01),True,47.6
54,2,Tanh(),False,46.0
48,2,Tanh(),True,45.8
25,3,LeakyReLU(negative_slope=0.01),True,43.8
49,3,Tanh(),True,41.3
