
## <font color=red> You should not import any new libraries. Your code should run with python=3.x</font>

#### <font color=red>For lab assignment, you will work with two datasets. The trained weights need to be saved and shared with us in a folder called models with the name ./models/{dataset_name}_weights.pkl. Your predict function should load these weights, initialize the DNN and predict the labels.</font>

- Your solutions will be auto-graded. Hence we request you to follow the instructions.
- Modify the code only between 
```
## TODO
## END TODO
```
- In addition to above changes, you can play with arguments to the functions for generating plots
- We will run the auto grading scripts with private test data

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import math
import pickle as pkl

Preprocessing

In [2]:
def preprocessing(X):
  """
  Implement Normalization for input image features

  Args:
  X : numpy array of shape (n_samples, 784)
   
  Returns:
  X_out: numpy array of shape (n_samples, 784) after normalization
  """
  X_out = None
  
  ## TODO
  X_mean = X.mean(axis=0)
  X_std = X.std(axis=0)
  X_out = (X-X_mean)/X_std
  ## END TODO

  assert X_out.shape == X.shape

  return X_out

### Split data into train/val

In [3]:
def split_data(X, Y, train_ratio=0.8):
    '''
    Split data into train and validation sets
    The first floor(train_ratio*n_sample) samples form the train set
    and the remaining the test set

    Args:
    X - numpy array of shape (n_samples, n_features)
    Y - numpy array of shape (n_samples, 1)
    train_ratio - fraction of samples to be used as training data

    Returns:
    X_train, Y_train, X_val, Y_val
    '''
    # Try Normalization and scaling and store it in X_transformed
    X_transformed = X

    ## TODO
    mx= X.max(axis=1).reshape(-1,1)
    mn = X.min(axis=1).reshape(-1,1)
    X_transformed = (X-mn)/(mx-mn)
    ## END TODO

    assert X_transformed.shape == X.shape

    num_samples = len(X)
    indices = np.arange(num_samples)
    num_train_samples = math.floor(num_samples * train_ratio)
    train_indices = np.random.choice(indices, num_train_samples, replace=False)
    val_indices = list(set(indices) - set(train_indices))
    X_train, Y_train, X_val, Y_val = X_transformed[train_indices], Y[train_indices], X_transformed[val_indices], Y[val_indices]
  
    return X_train, Y_train, X_val, Y_val

#Flatten the input

In [4]:
class FlattenLayer:
    '''
    This class converts a multi-dimensional into 1-d vector
    '''
    def __init__(self, input_shape):
        '''
         Args:
          input_shape : Original shape, tuple of ints
        '''
        self.input_shape = input_shape

    def forward(self, input):
        '''
        Converts a multi-dimensional into 1-d vector
        Args:
          input : training data, numpy array of shape (n_samples , self.input_shape)

        Returns:
          input: training data, numpy array of shape (n_samples , -1)
        '''
        ## TODO
        nsamps= input.shape[0]
        inp = input.reshape(nsamps,-1)
        #print(inp.shape)
        #Modify the return statement to return flattened input
        return inp
        ## END TODO
        
    
    def backward(self, output_error, learning_rate):
        '''
        Converts back the passed array to original dimention 
        Args:
        output_error :  numpy array 
        learning_rate: float

        Returns:
        output_error: A reshaped numpy array to allow backward pass
        '''
        ## TODO
        nsamps = output_error.shape[0]
        out_sh = [nsamps]+ list(self.input_shape)
        output_err = output_error.reshape(out_sh)

        #Modify the return statement to return reshaped array
        return output_err
        ## END TODO
        

#Fully Connected Layer

In [5]:
class FCLayer:
    '''
    Implements a fully connected layer  
    '''
    def __init__(self, input_size, output_size):
        '''
        Args:
         input_size : Input shape, int
         output_size: Output shape, int 
        '''
        self.input_size = input_size
        self.output_size = output_size
        ## TODO
        self.weights = np.random.randn(self.input_size, self.output_size)*(1/self.input_size) #initilaise weights for this layer
        self.bias = np.random.randn(self.output_size) #initilaise bias for this layer
        ## END TODO

    def forward(self, input):
        '''
        Performs a forward pass of a fully connected network
        Args:
          input : training data, numpy array of shape (n_samples , self.input_size)

        Returns:
           numpy array of shape (n_samples , self.output_size)
        '''
        ## TODO

        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        self.oldZ= np.copy(input)
        Zval = np.dot(input, self.weights) + self.bias
        return Zval
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        #out_error is nsamps , output_size
        #res should nsamps, input_size
        nsamps = output_error.shape[0]
        dW = (1/nsamps)*np.matmul(self.oldZ.T,output_error)
        db = np.mean(output_error, axis=0)
        dA = np.matmul(output_error, self.weights.T)
        
        self.weights -= learning_rate*dW
        self.bias -= learning_rate*db
        

        #Modify the return statement to return numpy array resulting from backward pass
        return dA
        ## END TODO

In [6]:
class ActivationLayer:
    '''
    Implements a Activation layer which applies activation function on the inputs. 
    '''
    def __init__(self, activation, activation_prime):
        '''
          Args:
          activation : Name of the activation function (sigmoid,tanh or relu)
          activation_prime: Name of the corresponding function to be used during backpropagation (sigmoid_prime,tanh_prime or relu_prime)
        '''
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        '''
        Applies the activation function 
        Args:
          input : numpy array on which activation function is to be applied

        Returns:
           numpy array output from the activation function
        '''
        ## TODO
        self.Zv = self.activation(input)
        
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return self.Zv
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        outss = (learning_rate*output_error)*self.activation_prime(self.Zv)
        #Modify the return statement to return numpy array resulting from backward pass
        return outss
        ## END TODO

In [7]:

class SoftmaxLayer:
    '''
      Implements a Softmax layer which applies softmax function on the inputs. 
    '''
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        '''
        Applies the softmax function 
        Args:
          input : numpy array on which softmax function is to be applied

        Returns:
           numpy array output from the softmax function
        '''
        ## TODO
        exps = np.exp(input)
        sums = np.sum(exps, axis =1).reshape(-1,1)
        self.res = exps/sums
        
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return self.res
        ## END TODO
        
    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a Softmax layer
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO

        #Modify the return statement to return numpy array resulting from backward pass
        return self.res * (output_error -(output_error * self.res).sum(axis=1)[:,None])
        ## END TODO

In [8]:
def sigmoid(x):
    '''
    Sigmoid function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying simoid function
    '''
    ## TODO
    exps = np.exp(x)
    res = exps/(1. + exps)
    #Modify the return statement to return numpy array resulting from backward pass
    return res
    ## END TODO

def sigmoid_prime(x):
    '''
     Implements derivative of Sigmoid function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Sigmoid function
    '''
    ## TODO
    
    #Modify the return statement to return numpy array resulting from backward pass
    return x*(1-x)
    ## END TODO

def tanh(x):
    '''
    Tanh function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying tanh function
    '''
    ## TODO
    tanhx=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    #Modify the return statement to return numpy array resulting from backward pass
    return tanhx
    ## END TODO

def tanh_prime(x):
    '''
     Implements derivative of Tanh function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Tanh function
    '''
    ## TODO
    tanhx=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    #Modify the return statement to return numpy array resulting from backward pass
    return 1 - tanhx**2
    ## END TODO

def relu(x):
    '''
    ReLU function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying ReLU function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.clip(x,0, None)
    ## END TODO

def relu_prime(x):
    '''
     Implements derivative of ReLU function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of ReLU function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.where(x>0,1,0)
    ## END TODO

In [9]:
def mse(y_true, y_pred):
    '''
    MSE loss
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.mean(np.square(y_pred - y_true), axis=1)
    ## END TODO

def mse_prime(y_true, y_pred):
    '''
    Implements derivative of MSE function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of MSE function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    #print(y_true.shape)
    return 2*(1/y_true.shape[1])*(y_pred-y_true)
    ## END TODO

def cross_entropy(y_true, y_pred):
    '''
    Cross entropy loss 
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    
    return (np.where(y_true==1, -np.log(np.clip(y_pred, 1e-8, None)), 0)).sum(axis=1)
    ## END TODO

def cross_entropy_prime(y_true, y_pred):
    '''
    Implements derivative of cross entropy function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of cross entropy function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.where(y_true==1, -1/np.clip(y_pred, 1e-8, None), 0)
    ## END TODO

Fit function

In [10]:
# def fit(X_train, Y_train,dataset_name):

#     '''
#     Create and trains a feedforward network

#     Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function 
#     Args:
#         X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
#         Y_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
#         dataset_name -- name of the dataset (flowers or mnist)
    
#     '''
     
#     #Note that this just a template to help you create your own feed forward network 
#     ## TODO

#     #define your network
#     #This network would work only for mnist
#     network = [
#         FlattenLayer(input_shape=(28, 28)),
#         FCLayer(28 * 28, 12),
#         ActivationLayer(sigmoid, sigmoid_prime),
#         FCLayer(12, 10),
#         SoftmaxLayer(10)
#     ] # This creates feed forward 


#     # Choose appropriate learning rate and no. of epoch
#     epochs = 40
#     learning_rate = 0.01

#     # Change training loop as you see fit
#     for epoch in range(epochs):
#         error = 0
#         i=0
#         for x, y_true in zip(X_train, Y_train):
#             # forward
            
#             output = x.reshape(1,28,28)
#             for layer in network:
#                 output = layer.forward(output)
            
#             # error (display purpose only)
#             #print(y_true)
#             y_true = np.array([y_true])
#             y_tru = np.zeros((y_true.size, 10))
#             y_tru[np.arange(y_true.size), y_true]=1
#             error += mse(y_tru, output)

#             # backward
#             output_error = mse_prime(y_tru, output)
#             for layer in reversed(network):
#                 output_error = layer.backward(output_error, learning_rate)
#             i+=1
        
#         error /= len(X_train)
#         print('%d/%d, error=%f' % (epoch + 1, epochs, error))

#     #Save you model weights
    
#     ## END TODO


In [13]:
def fit(X_train, Y_train,dataset_name):

    '''
    Create and trains a feedforward network

    Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function 
    Args:
        X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        Y_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        dataset_name -- name of the dataset (flowers or mnist)
    
    '''
     
    #Note that this just a template to help you create your own feed forward network 
    ## TODO
    
    # Choose appropriate learning rate and no. of epoch
    index = ['mnist', 'flowers'].index(dataset_name)

    input_shape = X_train.shape[1:]
    epochs = [100, 500][index]
    learning_rate = 0.01
    batch_size = [20, 8][index]
    n_labels = [10, 5][index]
    
    # nonrmalize and store in X_norm
    mu, sigma = np.mean(X_train, axis=0), np.std(X_train, axis=0)
    norm_pms = [mu, sigma]
    sigma[sigma == 0] = 1
    X_norm = (X_train - mu)/(sigma)
    
    # scale X_norm and store in X_scaled
    diff = (np.max(X_norm, axis=0) - np.min(X_norm, axis=0))
    diff[diff == 0] = 1
    X_scaled = (X_norm - np.min(X_norm, axis=0)) / (diff)
    
    # update X_train 
    X_train = X_scaled   
    
    #define your network
    #This network would work only for mnist
    network = [
        FlattenLayer(input_shape=input_shape),
        FCLayer(np.prod(input_shape), 12),
        ActivationLayer(sigmoid, sigmoid_prime),
        FCLayer(12, n_labels),
        SoftmaxLayer(n_labels)
    ] # This creates feed forward 


    # Change training loop as you see fit
    ls = []
    for epoch in range(epochs):
        error = 0
        for i in range(0, X_train.shape[0], batch_size):

            output = X_train[i:i + batch_size]
            for layer in network:
                output = layer.forward(output)

            y_true = Y_train[i:i + batch_size]

            y_vec = np.zeros((batch_size, n_labels))
            for j in range(batch_size):
                y_vec[j, y_true[j]] = 1

            error += cross_entropy(y_vec, output).sum()
#             print(y_vec.shape, output.shape)

            output_error = cross_entropy_prime(y_vec, output)
            for layer in reversed(network):
                output_error = layer.backward(output_error, learning_rate)

        error /= len(X_train)
        ls.append(error)
        print('%d/%d, error=%f' % (epoch + 1, epochs, error))

    # Save you model weights
    pkl.dump([norm_pms, network], open(f"./models/{dataset_name}_weights.pkl", "wb"))
    return ls
    
    ## END TODO


Loading datasets

In [14]:
dataset = "mnist" 
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")

fit(train_mnist[0],train_mnist[1],'mnist')

# dataset = "flowers" # "mnist"/"flowers"
# with open(f"./data/{dataset}_train.pkl", "rb") as file:
#     train_flowers = pkl.load(file)
#     print(f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")

# fit(train_flowers[0],train_flowers[1],'flowers')

train_x -- (60000, 28, 28); train_y -- (60000,)
1/100, error=2.307008
2/100, error=2.297431
3/100, error=2.292846
4/100, error=2.284933
5/100, error=2.270323
6/100, error=2.243177
7/100, error=2.195428
8/100, error=2.120664
9/100, error=2.019711
10/100, error=1.899549
11/100, error=1.768259
12/100, error=1.634811
13/100, error=1.508413
14/100, error=1.395429
15/100, error=1.298377
16/100, error=1.216613
17/100, error=1.147528
18/100, error=1.087945
19/100, error=1.035065
20/100, error=0.986830
21/100, error=0.941932
22/100, error=0.899699
23/100, error=0.859931
24/100, error=0.822714
25/100, error=0.788235
26/100, error=0.756637
27/100, error=0.727934
28/100, error=0.702005
29/100, error=0.678629
30/100, error=0.657538
31/100, error=0.638452
32/100, error=0.621110
33/100, error=0.605279
34/100, error=0.590757
35/100, error=0.577375
36/100, error=0.564989
37/100, error=0.553481
38/100, error=0.542751
39/100, error=0.532716
40/100, error=0.523306
41/100, error=0.514460
42/100, error=0.50

[2.3070080309604943,
 2.2974313098162193,
 2.2928459238856918,
 2.2849330502865928,
 2.270322876380689,
 2.243176798647577,
 2.195428306081371,
 2.1206636134988917,
 2.0197112195345266,
 1.8995491292742166,
 1.7682586235581144,
 1.634810668205768,
 1.5084132321412707,
 1.3954286931356703,
 1.298376859311883,
 1.2166128774175098,
 1.1475284761902245,
 1.0879449659790308,
 1.035065003785546,
 0.9868297252003673,
 0.941932031698256,
 0.8996994751420686,
 0.8599313347852371,
 0.8227138010027113,
 0.7882351486414082,
 0.7566370144133316,
 0.7279339583606007,
 0.7020047324731621,
 0.6786292223395467,
 0.6575378567772124,
 0.6384522114756729,
 0.6211103557350387,
 0.6052790322460266,
 0.590757170334133,
 0.5773746484951725,
 0.5649889095512087,
 0.5534809169293716,
 0.5427512064341374,
 0.5327163626425203,
 0.5233060232962942,
 0.5144604048854041,
 0.5061282951062037,
 0.4982654425591059,
 0.490833274237129,
 0.4837978782465611,
 0.4771291983233866,
 0.4708003959162591,
 0.46478734396280313,


In [18]:
def predict(X_test, dataset_name):
  """

  X_test -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.

   

  This is the only function that we will call from the auto grader. 

  This function should only perform inference, please donot train your models here.
  
  Steps to be done here:
  1. Load your trained weights from ./models/{dataset_name}_weights.pkl
  2. Ensure that you read weights using only the libraries we have given above.
  3. Initialize your model with your trained weights
  4. Compute the predicted labels and return it

  Please provide us the complete code you used for training including any techniques
  like data augmentation etc. that you have tried out. 

  Return:
  Y_test - nparray of shape (num_test,)
  """
  Y_test = np.zeros(X_test.shape[0],)

  ## TODO
    
  norm_pms, network = pkl.load(open(f'./models/{dataset_name}_weights.pkl', 'rb'))
    
  # nonrmalize and store in X_norm
  mu, sigma = norm_pms
  sigma[sigma == 0] = 1
  X_norm = (X_test - mu)/(sigma)
    
  # scale X_norm and store in X_scaled
  diff = (np.max(X_norm, axis=0) - np.min(X_norm, axis=0))
  diff[diff == 0] = 1
  X_scaled = (X_norm - np.min(X_norm, axis=0)) / (diff)
    
  # update X_train 
  output = X_scaled
    
  for layer in network:
    output = layer.forward(output)
    
  Y_test = np.argmax(output, axis=1)
  ## END TODO
  assert Y_test.shape == (X_test.shape[0],) and type(Y_test) == type(X_test), "Check what you return"
  return Y_test


In [19]:
dataset = "mnist" 
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")
    X_test = train_mnist[0]
    Y_test = train_mnist[1]
    
Y_pred = predict(X_test, dataset)
acc = np.sum(Y_pred == Y_test)/Y_test.shape[0]

from sklearn.metrics import precision_recall_fscore_support
import pandas as pd

p, r, f, _ =precision_recall_fscore_support(Y_test, Y_pred, average='macro')
report={'Precision':p, 'Recall':r, 'F1-score':f, "Accuracy":acc}

print("\n\nMNIST Dataset - Training Results\n")
for i in report:
    print("{}: {}".format(i, report[i]))
    
print("\n\n\n")
# df = pd.DataFrame(report)
# df

train_x -- (60000, 28, 28); train_y -- (60000,)


MNIST Dataset - Training Results

Precision: 0.9060170347851025
Recall: 0.9059270037374121
F1-score: 0.9058814718458276
Accuracy: 0.9071




