In [1]:
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import pickle
import platform
import os

In [28]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!git clone https://github.com/vita-epfl/DLAV-2022.git
path = os.getcwd() + '/DLAV-2022/homeworks/hw2/test_batch'

Cloning into 'DLAV-2022'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 68 (delta 22), reused 54 (delta 14), pack-reused 0[K
Unpacking objects: 100% (68/68), done.


In [24]:
# Write the location of the saved weight relative to this notebook. Assume that they are in the same directory
### Path to Model Weights 
softmax_weights = 'drive/MyDrive/Colab Notebooks/softmax_weights.pkl' 
pytorch_weights = 'drive/MyDrive/Colab Notebooks/linearClassifier_pytorch.ckpt'

**TODO:** Copy your code from the Softmax Notebook to their corresponding function

In [5]:

def softmax_loss_vectorized(W, X, y):
    """
  Softmax loss function, vectorized version.
  Inputs and outputs are the same as softmax_loss_naive.
  """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    C, D, N = W.shape[1], W.shape[0], X.shape[0] # Nb. of classes
    f_linear = W.T@X.T # CxD x DxN => CxN
    f_linear = f_linear - np.max(f_linear,axis=0) # for more numerically stable solution (exp can overshoot max representable number)
    S = np.divide(np.exp(f_linear),np.sum(np.exp(f_linear),axis=0)) # Softmax, CxN
    y_one_hot = np.eye(C)[y].T # CxN

    loss = np.mean(np.sum(S - y_one_hot,axis=0).T) # Vectorized, Nx1

    # The grad of the cross-entropy softmax of each element in the batch w.r.t. w should be 
    # a matrix of size : DxC
    dW = ((S - y_one_hot) @ X).T
    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################
    
    return loss, dW

class LinearClassifier(object):

    def __init__(self):
        self.W = None


    def train(self, X, y, learning_rate=1e-3, num_iters=30000,
                batch_size=200, verbose=False):
        """
        Train this linear classifier using stochastic gradient descent.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.
        - y: A numpy array of shape (N,) containing training labels; y[i] = c
          means that X[i] has label 0 <= c < C for C classes.
        - learning_rate: (float) learning rate for optimization.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.

        Outputs:
        A list containing the value of the loss function at each training iteration.
        """
        
        num_train, dim = X.shape
        num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
        
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None

            #########################################################################
            # TODO:                                                                 #
            # Sample batch_size elements from the training data and their           #
            # corresponding labels to use in this round of gradient descent.        #
            # Store the data in X_batch and their corresponding labels in           #
            # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
            # and y_batch should have shape (batch_size,)                           #
            #                                                                       #
            # Hint: Use np.random.choice to generate indices. Sampling with         #
            # replacement is faster than sampling without replacement.              #
            #########################################################################
            choice = np.random.choice(num_train,batch_size)
            X_batch = X[choice]
            y_batch = y[choice]
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)

            # perform parameter update
            #########################################################################
            # TODO:                                                                 #
            # Update the weights using the gradient and the learning rate.          #
            #########################################################################
            self.W -= learning_rate * grad
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))


        return loss_history
    


    def predict(self, X):
        """
        Use the trained weights of this linear classifier to predict labels for
        data points.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.

        Returns:
        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
          array of length N, and each element is an integer giving the predicted
          class.
        """

        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the predicted labels in y_pred.            #
        ###########################################################################
        f_linear = self.W.T@X.T # CxD x DxN => CxN
        f_linear = f_linear - np.max(f_linear,axis=0) # To avoid numerical instabilities
        y_pred = np.argmax(np.divide(np.exp(f_linear),np.sum(np.exp(f_linear),axis=0)),axis=0)
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_pred

    def loss(self, X_batch, y_batch):
        """
        Compute the loss function and its derivative. 
        Subclasses will override this.

        Inputs:
        - X_batch: A numpy array of shape (N, D) containing a minibatch of N
          data points; each point has dimension D.
        - y_batch: A numpy array of shape (N,) containing labels for the minibatch.


        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an array of the same shape as W
        
         e = y_batch - np.dot(X_batch, self.W) 
        
        loss = np.dot(e.T, e)
        grad = -np.dot(x_batch.T,e) / x_batch.shape[0]
  
        return loss, grad

        """

        return softmax_loss_vectorized(self.W, X_batch, y_batch)
        
class Softmax(LinearClassifier):
    """ A subclass that uses the Softmax + Cross-entropy loss function """

    def loss(self, X_batch, y_batch):
        return softmax_loss_vectorized(self.W, X_batch, y_batch)

**TODO:** Copy the model you created from the Pytorch Notebook

In [34]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, hidden_size, n_output):
        super(Net, self).__init__()
        
        ################################################################################
        # TODO:                                                                        #
        # Define 2 or more different layers of the neural network                      #
        ################################################################################

        # Input : n_features, output : n_output
        self.layers = []
        self.layers_params = []
        # dimensions = np.linspace(n_feature,n_output,n_hidden + 3).astype(int)
        dimensions = hidden_size * np.ones([n_hidden + 3]).astype(int)
        dimensions[0], dimensions[-1] = n_feature, n_output
        # Define as many hidden layers as required with same dimensions
        for i in range(n_hidden + 2):
          self.layers.append(torch.nn.Linear(dimensions[i],dimensions[i+1]))
          self.layers_params += list(self.layers[i].parameters())

        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################


    def forward(self, x):
        x = x.view(x.size(0),-1)
        ################################################################################
        # TODO:                                                                        #
        # Set up the forward pass that the input data will go through.                 #
        # A good activation function betweent the layers is a ReLu function.           #
        ################################################################################
        for _, layer in enumerate(self.layers):
          x = torch.nn.functional.relu(layer(x))
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        return x

**TODO**: Follow the instructions in each of the following methods. **Note that these methods should return a 1-D array of size N where N is the number of data samples. The values should be the predicted classes [0,...,9].**



In [35]:
def predict_usingPytorch(X):
    #########################################################################
    # TODO:                                                                 #
    # - Create your model                                                   #
    # - Load your saved model                                               #
    # - Do the operation required to get the predictions                    #
    # - Return predictions in a numpy array (hint: return "argmax")         #
    #########################################################################
    # Load network
    net = Net(n_feature=3072, n_hidden=3, hidden_size=1000, n_output=10)     # define the network
    checkpoint = torch.load("drive/MyDrive/Colab Notebooks/linearClassifier_pytorch.ckpt")
    net.load_state_dict(checkpoint)
    
    # Make predictions
    y_pred = torch.argmax(net(X))
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################
    return y_pred.numpy()

def predict_usingSoftmax(X):
    #########################################################################
    # TODO:                                                                 #
    # - Load your saved model into the weights of Softmax                   #
    # - Do the operation required to get the predictions                    #
    # - Return predictions in a numpy array                                 #
    #########################################################################
    # Load model
    with open('drive/MyDrive/Colab Notebooks/softmax_weights.pkl', 'rb') as f:
      W = pickle.load(f)
    loaded_softmax = Softmax()
    loaded_softmax.W = W.copy()

    # Make predictions
    y_pred = loaded_softmax.predict(X)
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################
    return y_pred

This method loads the test dataset to evaluate the model.

In [31]:
## Read DATA
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))

def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = load_pickle(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y
test_filename = path
X,Y = load_CIFAR_batch(test_filename)

This code snippet prepares the data for the different models. If you modify data manipulation in your notebooks, make sure to include them here. 

In [32]:
## Data Manipulation

mean = np.array([0.4914, 0.4822, 0.4465])
std = np.array([0.2023, 0.1994, 0.2010])
X = np.divide(np.subtract( X/255 , mean[np.newaxis,np.newaxis,:]), std[np.newaxis,np.newaxis,:])

X_pytorch = torch.Tensor(np.moveaxis(X,-1,1))
X_softmax = np.reshape(X, (X.shape[0], -1))
X_softmax = np.hstack([X_softmax, np.ones((X_softmax.shape[0], 1))])


Runs evaluation on the Pytorch and softmax model. **Be careful that *prediction_pytorch* and *prediction_softmax* are 1-D array of size N where N is the number of data samples. The values should be the predicted class [0,...,9]**

---



In [36]:
## Run Prediction
prediction_pytorch = predict_usingPytorch(X_pytorch)
prediction_softmax = predict_usingSoftmax(X_softmax)

## Run Evaluation
acc_softmax = sum(prediction_softmax == Y)/len(X)
acc_pytorch = sum(prediction_pytorch == Y)/len(X)
print("Softmax= %f ... Pytorch= %f"%(acc_softmax, acc_pytorch))

Softmax= 0.379500 ... Pytorch= 0.000000
