In [None]:
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import pickle
import platform
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!git clone https://github.com/vita-epfl/DLAV-2024.git
path = os.getcwd() + '/DLAV-2024/homeworks/hw2/test_batch'

In [None]:
# Write the location of the saved weight relative to this notebook. Assume that they are in the same directory
### Path to Model Weights 
with open('drive/MyDrive/Colab Notebooks/softmax_weights.pkl', 'rb') as f:
    softmax_weights = pickle.load(f)



#pytorch_weights = ...

**TODO:** Copy your code from the Softmax Notebook to their corresponding function

In [None]:
def softmax_loss_vectorized(W, X, y):
    """
    Softmax loss function, vectorized version.

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    # Compute the score for each class
    class_scores = X.dot(W)
    
    # Shift the scores so that the highest value is zero (For increased stability)
    class_scores -= np.max(class_scores, axis=1, keepdims=True)
    
    # Compute the probabilities e^zi/sum(e^z)
    probabilities = np.exp(class_scores) / np.sum(np.exp(class_scores), axis=1, keepdims=True)

    
    # Compute the loss, loss(z)=sum(-log(z))/n
    N = X.shape[0]  # Number of datapoints
    logprobs = -np.log(probabilities[range(N), y]) # loss of each instance
    loss = np.sum(logprobs) / N # average loss
    
    # Compute the gradient
    derivative = probabilities
    derivative[range(N), y] -= 1 # Subtract one from the entries that correspons to the true class
    derivative /= N 
    
    # Backpropagate the gradient to the weights W
    dW = X.T.dot(derivative)
    
    return loss, dW

class LinearClassifier(object):

    def __init__(self):
        self.W = None

    def train(self, X, y, learning_rate=1e-3, num_iters=100,
            batch_size=200, verbose=False):
        """
        Train this linear classifier using stochastic gradient descent.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
            training samples each of dimension D.
        - y: A numpy array of shape (N,) containing training labels; y[i] = c
            means that X[i] has label 0 <= c < C for C classes.
        - learning_rate: (float) learning rate for optimization.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.

        Outputs:
        A list containing the value of the loss function at each training iteration.
        """
        num_train, dim = X.shape
        num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in range(num_iters):
            #########################################################################
            # TODO:                                                                 #
            # Sample batch_size elements from the training data and their           #
            # corresponding labels to use in this round of gradient descent.        #
            # Store the data in X_batch and their corresponding labels in           #
            # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
            # and y_batch should have shape (batch_size,)                           #
            #                                                                       #
            # Hint: Use np.random.choice to generate indices. Sampling with         #
            # replacement is faster than sampling without replacement.              #
            #########################################################################
            indices = np.random.choice(num_train, batch_size, replace=True)
            X_batch = X[indices]
            y_batch = y[indices]
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch)
            loss_history.append(loss)

            # perform parameter update
            #########################################################################
            # TODO:                                                                 #
            # Update the weights using the gradient and the learning rate.          #
            #########################################################################
            self.W -= learning_rate*grad
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))

        return loss_history

    def predict(self, X):
        """
        Use the trained weights of this linear classifier to predict labels for
        data points.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
        training samples each of dimension D.

        Returns:
        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
        array of length N, and each element is an integer giving the predicted
        class.
        """
        y_pred = np.zeros(X.shape[0])
        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the predicted labels in y_pred.            #
        ###########################################################################
        class_scores = X.dot(self.W)
        y_pred = np.argmax(class_scores, axis=1)
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_pred
  
    def loss(self, X_batch, y_batch):
        """
        Compute the loss function and its derivative. 
        Subclasses will override this.

        Inputs:
        - X_batch: A numpy array of shape (N, D) containing a minibatch of N
            data points; each point has dimension D.
        - y_batch: A numpy array of shape (N,) containing labels for the minibatch.

        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an array of the same shape as W
        """
        return softmax_loss_vectorized(self.W, X_batch, y_batch)
        


class Softmax(LinearClassifier):
    """ A subclass that uses the Softmax + Cross-entropy loss function """

    def loss(self, X_batch, y_batch):
        return softmax_loss_vectorized(self.W, X_batch, y_batch)

**TODO:** Copy the model you created from the Pytorch Notebook

In [None]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        
        ################################################################################
        # TODO:                                                                        #
        # Define 2 or more different layers of the neural network                      #
        ################################################################################
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        self.fc1 = torch.nn.Linear(64 * 4 * 4, 128)
        self.fc2 = torch.nn.Linear(128, n_output)
        
        # Dropout layer (to prevent overfitting)
        self.dropout = torch.nn.Dropout(0.5)
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################


    def forward(self, x):
        x = x.view(x.size(0),-1)
        ################################################################################
        # TODO:                                                                        #
        # Set up the forward pass that the input data will go through.                 #
        # A good activation function betweent the layers is a ReLu function.           #
        ################################################################################
        # Apply conv follow by relu then pool
        x = x.view(-1, 3, 32, 32)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  # No activation function, as this will be handled by the loss function for multi-class classification
        
        ################################################################################
        #                              END OF YOUR CODE                                #
        ################################################################################
        return x



**TODO**: Follow the instructions in each of the following methods. **Note that these methods should return a 1-D array of size N where N is the number of data samples. The values should be the predicted classes [0,...,9].**



In [None]:
def predict_usingPytorch(X):
    #########################################################################
    # TODO:                                                                 #
    # - Create your model                                                   #
    # - Load your saved model                                               #
    # - Do the operation required to get the predictions                    #
    # - Return predictions in a numpy array (hint: return "argmax")         #
    #########################################################################
    net = Net(n_feature=3072, n_hidden=100, n_output=10)
    checkpoint = torch.load("drive/MyDrive/Colab Notebooks/linearClassifier_pytorch.ckpt")
    net.load_state_dict(checkpoint)

    net.eval()
    
    with torch.no_grad():
        logits = net(X)  # Get the raw model outputs
        y_pred = torch.argmax(logits, dim=1)
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################
    return y_pred.numpy()

def predict_usingSoftmax(X):
    # Load your saved weights
    with open('drive/MyDrive/Colab Notebooks/softmax_weights.pkl', 'rb') as f:
        W = pickle.load(f)
    
    # Convert X to a PyTorch tensor if it's not already (assuming X is a NumPy array)
    if isinstance(X, np.ndarray):
        X = torch.from_numpy(X).float()
    
    # If your weights are in a NumPy array, convert them to a PyTorch tensor
    if isinstance(W, np.ndarray):
        W = torch.from_numpy(W).float()
    
    # Ensure W is transposed if necessary, depending on how your data and weights align
    # logits = torch.matmul(X, W.T) if your model expects features as row vectors
    logits = torch.matmul(X, W)
    
    # Apply softmax to obtain the probabilities for each class
    probabilities = torch.softmax(logits, dim=1)
    
    # Get the predicted class indices
    _, y_pred = torch.max(probabilities, 1)
    
    # Return predictions as a numpy array
    return y_pred.numpy()

This method loads the test dataset to evaluate the model.

In [None]:
## Read DATA
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))

def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = load_pickle(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y
test_filename = path
X,Y = load_CIFAR_batch(test_filename)

This code snippet prepares the data for the different models. If you modify data manipulation in your notebooks, make sure to include them here. 

In [None]:
## Data Manipulation

mean = np.array([0.4914, 0.4822, 0.4465])
std = np.array([0.2023, 0.1994, 0.2010])
X = np.divide(np.subtract( X/255 , mean[np.newaxis,np.newaxis,:]), std[np.newaxis,np.newaxis,:])

X_pytorch = torch.Tensor(np.moveaxis(X,-1,1))
X_softmax = np.reshape(X, (X.shape[0], -1))
X_softmax = np.hstack([X_softmax, np.ones((X_softmax.shape[0], 1))])


Runs evaluation on the Pytorch and softmax model. **Be careful that *prediction_pytorch* and *prediction_softmax* are 1-D array of size N where N is the number of data samples. The values should be the predicted class [0,...,9]**

---



In [None]:
## Run Prediction
prediction_pytorch = predict_usingPytorch(X_pytorch)
prediction_softmax = predict_usingSoftmax(X_softmax)

## Run Evaluation
acc_softmax = sum(prediction_softmax == Y)/len(X)
acc_pytorch = sum(prediction_pytorch == Y)/len(X)
print("Softmax= %f ... Pytorch= %f"%(acc_softmax, acc_pytorch))