
## <font color=red> You should not import any new libraries. Your code should run with python=3.x</font>

#### <font color=red>For lab assignment, you will work with two datasets. The trained weights need to be saved and shared with us in a folder called models with the name ./models/{dataset_name}_weights.pkl. Your predict function should load these weights, initialize the DNN and predict the labels.</font>

- Your solutions will be auto-graded. Hence we request you to follow the instructions.
- Modify the code only between 
```
## TODO
## END TODO
```
- In addition to above changes, you can play with arguments to the functions for generating plots
- We will run the auto grading scripts with private test data

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import math
import pickle as pkl

Preprocessing

In [2]:
def preprocessing(X):
  """
  Implement Normalization for input image features

  Args:
  X : numpy array of shape (n_samples, 784)
   
  Returns:
  X_out: numpy array of shape (n_samples, 784) after normalization
  """
  X_out = None
  
  ## TODO
    
  mu = np.mean(X, axis=0)
  sigma = np.std(axis=0)
  X_out = (X-mu)/sigma
  
  ## END TODO

  assert X_out.shape == X.shape

  return X_out

### Split data into train/val

In [3]:
def split_data(X, Y, train_ratio=0.8):
    '''
    Split data into train and validation sets
    The first floor(train_ratio*n_sample) samples form the train set
    and the remaining the test set

    Args:
    X - numpy array of shape (n_samples, n_features)
    Y - numpy array of shape (n_samples, 1)
    train_ratio - fraction of samples to be used as training data

    Returns:
    X_train, Y_train, X_val, Y_val
    '''
    # Try Normalization and scaling and store it in X_transformed
    X_transformed = X

    ## TODO
    
    M = np.max(X, axis=1).reshape(-1,1)
    m = np.min(X, axis=1).reshape(-1,1)
    X_transformed = (X-m)/(M-m)
    
    ## END TODO

    assert X_transformed.shape == X.shape

    num_samples = len(X)
    indices = np.arange(num_samples)
    num_train_samples = math.floor(num_samples * train_ratio)
    train_indices = np.random.choice(indices, num_train_samples, replace=False)
    val_indices = list(set(indices) - set(train_indices))
    X_train, Y_train, X_val, Y_val = X_transformed[train_indices], Y[train_indices], X_transformed[val_indices], Y[val_indices]
  
    return X_train, Y_train, X_val, Y_val

#Flatten the input

In [4]:
class FlattenLayer:
    '''
    This class converts a multi-dimensional into 1-d vector
    '''
    def __init__(self, input_shape):
        '''
         Args:
          input_shape : Original shape, tuple of ints
        '''
        self.input_shape = input_shape

    def forward(self, input):
        '''
        Converts a multi-dimensional into 1-d vector
        Args:
          input : training data, numpy array of shape (n_samples , self.input_shape)

        Returns:
          input: training data, numpy array of shape (n_samples , -1)
        '''
        ## TODO
        n_samp = input.shape[0]
        inp = input.reshape(n_samp, -1)
        #Modify the return statement to return flattened input
        return inp
        ## END TODO
        
    
    def backward(self, output_error, learning_rate):
        '''
        Converts back the passed array to original dimention 
        Args:
        output_error :  numpy array 
        learning_rate: float

        Returns:
        output_error: A reshaped numpy array to allow backward pass
        '''
        ## TODO
        nsamps = output_error.shape[0]
        out_sh = [nsamps]+ list(self.input_shape)
        output_err = output_error.reshape(out_sh)
        #Modify the return statement to return reshaped array
        return output_error
        ## END TODO
        

#Fully Connected Layer

In [5]:
class FCLayer:
    '''
    Implements a fully connected layer  
    '''
    def __init__(self, input_size, output_size):
        '''
        Args:
         input_size : Input shape, int
         output_size: Output shape, int 
        '''
        self.input_size = input_size
        self.output_size = output_size
        ## TODO
        self.weights = np.random.randn(self.input_size, self.output_size)*(1/self.input_size) #initilaise weights for this layer
        self.bias = np.random.randn(self.output_size) #initilaise bias for this layer
        ## END TODO

    def forward(self, input):
        '''
        Performs a forward pass of a fully connected network
        Args:
          input : training data, numpy array of shape (n_samples , self.input_size)

        Returns:
           numpy array of shape (n_samples , self.output_size)
        '''
        ## TODO
        self.Z0 = np.copy(input)
        Z1 = np.dot(input, self.weights) + self.bias
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return Z1
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        
        nsamps = output_error.shape[0]
        dW = (1/nsamps)*np.matmul(self.Z0.T, output_error)
        db = np.mean(output_error, axis=0)
        dA = np.matmul(output_error, self.weights.T)
        
        self.weights -= learning_rate*dW
        self.bias -= learning_rate*db

        #Modify the return statement to return numpy array resulting from backward pass
        return dA
        ## END TODO

In [6]:
class ActivationLayer:
    '''
    Implements a Activation layer which applies activation function on the inputs. 
    '''
    def __init__(self, activation, activation_prime):
        '''
          Args:
          activation : Name of the activation function (sigmoid,tanh or relu)
          activation_prime: Name of the corresponding function to be used during backpropagation (sigmoid_prime,tanh_prime or relu_prime)
        '''
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        '''
        Applies the activation function 
        Args:
          input : numpy array on which activation function is to be applied

        Returns:
           numpy array output from the activation function
        '''
        ## TODO
        self.Z = self.activation(input)
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return self.Z
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        out = (learning_rate * output_error) * self.activation_prime(self.Z)
        #Modify the return statement to return numpy array resulting from backward pass
        return out
        ## END TODO

In [7]:

class SoftmaxLayer:
    '''
      Implements a Softmax layer which applies softmax function on the inputs. 
    '''
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        '''
        Applies the softmax function 
        Args:
          input : numpy array on which softmax function is to be applied

        Returns:
           numpy array output from the softmax function
        '''
        ## TODO
        e1 = np.exp(input)
        esum = np.sum(e1, axis =1).reshape(-1,1)
        self.out = e1/esum
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return self.out
        ## END TODO
        
    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a Softmax layer
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        do = self.out * (output_error -(output_error * self.out).sum(axis=1)[:,None])
        #Modify the return statement to return numpy array resulting from backward pass
        return do
        ## END TODO

In [8]:
def sigmoid(x):
    '''
    Sigmoid function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying simoid function
    '''
    ## TODO
    z = 1./(1. + np.exp(-x))
    #Modify the return statement to return numpy array resulting from backward pass
    return z
    ## END TODO

def sigmoid_prime(x):
    '''
     Implements derivative of Sigmoid function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Sigmoid function
    '''
    ## TODO
    dz = x*(1. -x)
    #Modify the return statement to return numpy array resulting from backward pass
    return dz
    ## END TODO

def tanh(x):
    '''
    Tanh function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying tanh function
    '''
    ## TODO
    z = (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
    #Modify the return statement to return numpy array resulting from backward pass
    return z
    ## END TODO

def tanh_prime(x):
    '''
     Implements derivative of Tanh function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Tanh function
    '''
    ## TODO
    dz = (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
    #Modify the return statement to return numpy array resulting from backward pass
    return dz
    ## END TODO

def relu(x):
    '''
    ReLU function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying ReLU function
    '''
    ## TODO
    z = x * (x > 0)
    #Modify the return statement to return numpy array resulting from backward pass
    return z
    ## END TODO

def relu_prime(x):
    '''
     Implements derivative of ReLU function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of ReLU function
    '''
    ## TODO
    dz = 1. * (x > 0)
    #Modify the return statement to return numpy array resulting from backward pass
    return dz
    ## END TODO

In [9]:
def mse(y_true, y_pred):
    '''
    MSE loss
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO
    loss = np.mean(np.square(y_pred - y_true), axis=1)
    #Modify the return statement to return numpy array resulting from backward pass
    return loss
    ## END TODO

def mse_prime(y_true, y_pred):
    '''
    Implements derivative of MSE function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of MSE function
    '''
    ## TODO
    dl = 2*(1/y_true.shape[1])*(y_pred-y_true)
    #Modify the return statement to return numpy array resulting from backward pass
    return dl
    ## END TODO

def cross_entropy(y_true, y_pred):
    '''
    Cross entropy loss 
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO
    loss = (np.where(y_true==1, -np.log(np.clip(y_pred, 1e-8, None)), 0)).sum(axis=1)
    #Modify the return statement to return numpy array resulting from backward pass
    return loss
    ## END TODO

def cross_entropy_prime(y_true, y_pred):
    '''
    Implements derivative of cross entropy function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of cross entropy function
    '''
    ## TODO
    dl = np.where(y_true==1, -1/np.clip(y_pred, 1e-8, None), 0)
    #Modify the return statement to return numpy array resulting from backward pass
    return dl
    ## END TODO

Fit function

In [21]:
def fit(X_train, Y_train,dataset_name):

    '''
    Create and trains a feedforward network

    Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function 
    Args:
        X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        Y_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        dataset_name -- name of the dataset (flowers or mnist)
    
    '''
     
    #Note that this just a template to help you create your own feed forward network 
    ## TODO
    
    # Choose appropriate learning rate and no. of epoch
    index = ['mnist', 'flowers'].index(dataset_name)

    input_shape = X_train.shape[1:]
    epochs = [100, 100][index]
    learning_rate = 0.025
    batch_size = [16, 8][index]
    n_labels = [10, 5][index]
    
    # nonrmalize and store in X_norm
    mu, sigma = np.mean(X_train, axis=0), np.std(X_train, axis=0)
    norm_pms = [mu, sigma]
    sigma[sigma == 0] = 1
    X_norm = (X_train - mu)/(sigma)
    
    # scale X_norm and store in X_scaled
    diff = (np.max(X_norm, axis=0) - np.min(X_norm, axis=0))
    diff[diff == 0] = 1
    X_scaled = (X_norm - np.min(X_norm, axis=0)) / (diff)
    
    # update X_train 
    X_train = X_scaled   
    
    #define your network
    #This network would work only for mnist
    
    hidden_layer = 12
    network = [
        FlattenLayer(input_shape=input_shape),
        FCLayer(np.prod(input_shape), hidden_layer),
        ActivationLayer(sigmoid, sigmoid_prime),
        FCLayer(hidden_layer, n_labels),
        SoftmaxLayer(n_labels)
    ] # This creates feed forward 


    # Change training loop as you see fit
    ls = []
    for epoch in range(epochs):
        error = 0
        for i in range(0, X_train.shape[0], batch_size):

            output = X_train[i:i + batch_size]
            for layer in network:
                output = layer.forward(output)

            y_true = Y_train[i:i + batch_size]

            y_vec = np.zeros((batch_size, n_labels))
            for j in range(batch_size):
                y_vec[j, y_true[j]] = 1

            error += cross_entropy(y_vec, output).sum()
#             print(y_vec.shape, output.shape)

            output_error = cross_entropy_prime(y_vec, output)
            for layer in reversed(network):
                output_error = layer.backward(output_error, learning_rate)

        error /= len(X_train)
        ls.append(error)
        print('%d/%d, error=%f' % (epoch + 1, epochs, error))

    # Save you model weights
    pkl.dump([norm_pms, network], open(f"./models/{dataset_name}_weights.pkl", "wb"))
    return ls
    
    ## END TODO


Loading datasets

In [22]:
dataset = "mnist" 
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")

fit(train_mnist[0],train_mnist[1],'mnist')

dataset = "flowers" # "mnist"/"flowers"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_flowers = pkl.load(file)
    print(f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")

fit(train_flowers[0],train_flowers[1],'flowers')

train_x -- (60000, 28, 28); train_y -- (60000,)
1/100, error=2.290907
2/100, error=2.069346
3/100, error=1.423590
4/100, error=0.986878
5/100, error=0.764528
6/100, error=0.639199
7/100, error=0.563119
8/100, error=0.512448
9/100, error=0.476148
10/100, error=0.448838
11/100, error=0.427544
12/100, error=0.410441
13/100, error=0.396342
14/100, error=0.384451
15/100, error=0.374225
16/100, error=0.365283
17/100, error=0.357360
18/100, error=0.350261
19/100, error=0.343844
20/100, error=0.338002
21/100, error=0.332651
22/100, error=0.327728
23/100, error=0.323179
24/100, error=0.318961
25/100, error=0.315037
26/100, error=0.311376
27/100, error=0.307951
28/100, error=0.304739
29/100, error=0.301719
30/100, error=0.298873
31/100, error=0.296184
32/100, error=0.293639
33/100, error=0.291225
34/100, error=0.288930
35/100, error=0.286744
36/100, error=0.284659
37/100, error=0.282666
38/100, error=0.280757
39/100, error=0.278926
40/100, error=0.277167
41/100, error=0.275475
42/100, error=0.27

[1.6233542928941298,
 1.602432367164825,
 1.6014886543167723,
 1.600435688979745,
 1.5992171533208994,
 1.5977641529042552,
 1.5959902155133383,
 1.5937849011340026,
 1.5910059280528106,
 1.5874696215740571,
 1.5829396923668795,
 1.57711474763127,
 1.5696156502345195,
 1.5599750629893017,
 1.547633574158827,
 1.5319501835736558,
 1.5122400323394452,
 1.4878582465019594,
 1.4583506045449892,
 1.4236755259788096,
 1.384446207391988,
 1.342050150904955,
 1.2984699224142435,
 1.255793028289091,
 1.215673821257149,
 1.1790577864075686,
 1.1462300984168512,
 1.1170308136104394,
 1.0910723470999004,
 1.0678896027510012,
 1.0470225900586905,
 1.0280536256294017,
 1.010619470468825,
 0.9944114984344239,
 0.979171047242201,
 0.9646834607243931,
 0.9507723654805678,
 0.9372947354405012,
 0.92413682210399,
 0.9112108113036003,
 0.8984519816514543,
 0.8858161263188088,
 0.8732770323020043,
 0.8608238760894817,
 0.8484584790331585,
 0.8361924536820594,
 0.8240443453704095,
 0.8120369154574738,
 0.80

In [23]:
def predict(X_test, dataset_name):
  """

  X_test -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.

   

  This is the only function that we will call from the auto grader. 

  This function should only perform inference, please donot train your models here.
  
  Steps to be done here:
  1. Load your trained weights from ./models/{dataset_name}_weights.pkl
  2. Ensure that you read weights using only the libraries we have given above.
  3. Initialize your model with your trained weights
  4. Compute the predicted labels and return it

  Please provide us the complete code you used for training including any techniques
  like data augmentation etc. that you have tried out. 

  Return:
  Y_test - nparray of shape (num_test,)
  """
  Y_test = np.zeros(X_test.shape[0],)

  ## TODO
    
  norm_pms, network = pkl.load(open(f'./models/{dataset_name}_weights.pkl', 'rb'))
    
  # nonrmalize and store in X_norm
  mu, sigma = norm_pms
  sigma[sigma == 0] = 1
  X_norm = (X_test - mu)/(sigma)
    
  # scale X_norm and store in X_scaled
  diff = (np.max(X_norm, axis=0) - np.min(X_norm, axis=0))
  diff[diff == 0] = 1
  X_scaled = (X_norm - np.min(X_norm, axis=0)) / (diff)
    
  # update X_train 
  output = X_scaled
    
  for layer in network:
    output = layer.forward(output)
    
  Y_test = np.argmax(output, axis=1)
  ## END TODO
  assert Y_test.shape == (X_test.shape[0],) and type(Y_test) == type(X_test), "Check what you return"
  return Y_test
