
## <font color=red> You should not import any new libraries. Your code should run with python=3.x</font>

#### <font color=red>For lab assignment, you will work with two datasets. The trained weights need to be saved and shared with us in a folder called models with the name ./models/{dataset_name}_weights.pkl. Your predict function should load these weights, initialize the DNN and predict the labels.</font>

- Your solutions will be auto-graded. Hence we request you to follow the instructions.
- Modify the code only between 
```
## TODO
## END TODO
```
- In addition to above changes, you can play with arguments to the functions for generating plots
- We will run the auto grading scripts with private test data

In [38]:
import numpy as np
from matplotlib import pyplot as plt
import math
import pickle as pkl

Preprocessing

In [39]:
def preprocessing(X):
  """
  Implement Normalization for input image features

  Args:
  X : numpy array of shape (n_samples, 784)
   
  Returns:
  X_out: numpy array of shape (n_samples, 784) after normalization
  """
  X_out = None
  
  ## TODO
  
  X_mean = np.mean(X, axis = 0)
  X_std = np.std(X, axis = 0)
  X_std[(X_std==0)] = 1
  X_out = np.divide((X - X_mean), X_std)
  
  ## END TODO

  assert X_out.shape == X.shape

  return X_out


def scaling(X):
  """
  Implement MinMax Scaling on input image features

  Args:
  X : numpy array of shape (n_samples, 784)
   
  Returns:
  X_scaled : numpy array of shape (n_samples, 784)
  """
  X_scaled = None

  ##TODO
  # we will scale to 0-1
  X_max = np.max(X, axis=0)
  X_min = np.min(X, axis=0)
  X_range = X_max - X_min
  X_scaled = np.divide(X-X_min, X_range, where=(X_range != 0))

  ##END TODO

  assert X_scaled.shape == X.shape

  return X_scaled


### Split data into train/val

In [40]:
def split_data(X, Y, train_ratio=0.8):
    '''
    Split data into train and validation sets
    The first floor(train_ratio*n_sample) samples form the train set
    and the remaining the test set

    Args:
    X - numpy array of shape (n_samples, n_features)
    Y - numpy array of shape (n_samples, 1)
    train_ratio - fraction of samples to be used as training data

    Returns:
    X_train, Y_train, X_val, Y_val
    '''
    # Try Normalization and scaling and store it in X_transformed
    X_transformed = X

    ## TODO
    X_transformed = scaling(preprocessing(X))
    
    ## END TODO

    assert X_transformed.shape == X.shape

    num_samples = len(X)
    indices = np.arange(num_samples)
    num_train_samples = math.floor(num_samples * train_ratio)
    train_indices = np.random.choice(indices, num_train_samples, replace=False)
    val_indices = list(set(indices) - set(train_indices))
    X_train, Y_train, X_val, Y_val = X_transformed[train_indices], Y[train_indices], X_transformed[val_indices], Y[val_indices]
  
    return X_train, Y_train, X_val, Y_val

#Flatten the input

In [41]:
class FlattenLayer:
    '''
    This class converts a multi-dimensional into 1-d vector
    '''

    def __init__(self, input_shape):
        '''
        Args:
         input_shape : Original shape, tuple of ints
        '''
        self.input_shape = input_shape  

    def forward(self, input):
        '''
        Converts a multi-dimensional into 1-d vector
        Args:
          input : training data, numpy array of shape (n_samples , self.input_shape)

        Returns:
          input: training data, numpy array of shape (n_samples , -1)
        '''
        # TODO

        n_samples = input.shape[0]
        ans = input.reshape(n_samples, -1)

        # Modify the return statement to return flattened input
        return ans
        
        # END TODO

    def backward(self, output_error, learning_rate):
        '''
        Converts back the passed array to original dimension 
        Args:
        output_error :  numpy array 
        learning_rate: float

        Returns:
        output_error: A reshaped numpy array to allow backward pass
        '''
        # TODO
        
        n_samples = output_error.shape[0]
        ans = output_error.reshape([n_samples]+list(self.input_shape))

        # Modify the return statement to return reshaped array
        return ans

        # END TODO


#Fully Connected Layer

In [42]:
class FCLayer:
    '''
    Implements a fully connected layer  
    '''
    def __init__(self, input_size, output_size):
        '''
        Args:
         input_size : Input shape, int
         output_size: Output shape, int 
        '''
        self.input_size = input_size
        self.output_size = output_size
        ## TODO

        #initilaise weights for this layer
        self.weights = np.zeros((input_size, output_size))
        # self.weights = np.random.rand(input_size, output_size)
        """ W[i][j] is from ith input to jth output  """

        #initilaise bias for this layer
        self.bias = np.random.rand(output_size)
        """ B[j] is bias for jth output """
        
        ## END TODO

    def forward(self, input):
        '''
        Performs a forward pass of a fully connected network
        Args:
          input : training data, numpy array of shape (n_samples , self.input_size)

        Returns:
           numpy array of shape (n_samples , self.output_size)
        '''
        ## TODO
        self.input = input.copy() # for backtracking, this is the coefficent

        ans = (input @ self.weights) + self.bias.reshape(1, self.output_size)

        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return ans

        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        n_samples = output_error.shape[0]
        lamda = 1
        """ Performing batch update. Hence, we stored the input_mean earlier """
        
        ## Calculating the backpass value
        assert output_error.shape[1] == self.output_size
        backpass = output_error @ self.weights.T
        assert backpass.shape[1] == self.input_size

        ## Updating the parameters
        assert self.weights.shape[0] == self.input.shape[1]
        # batch update

        self.weights -= learning_rate*(( self.input.T @ output_error) + (lamda/n_samples)*self.weights)
        
        self.bias -= learning_rate*(np.sum(output_error, axis=0).reshape(-1))

        #Modify the return statement to return numpy array resulting from backward pass
        self.bias = self.bias.reshape(-1)
        assert self.bias.shape[0] == self.output_size, "self.bias.shape[0] == {} != self.output_size == {}".format(self.bias.shape[0], self.output_size)
        return backpass
        ## END TODO


In [43]:
class ActivationLayer:
    '''
    Implements a Activation layer which applies activation function on the inputs. 
    '''
    def __init__(self, activation, activation_prime):
        '''
          Args:
          activation : Name of the activation function (sigmoid,tanh or relu)
          activation_prime: Name of the corresponding function to be used during backpropagation (sigmoid_prime,tanh_prime or relu_prime)
        '''
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        '''
        Applies the activation function 
        Args:
          input : numpy array on which activation function is to be applied

        Returns:
           numpy array output from the activation function
        '''
        ## TODO
        self.input_size = input.shape[1]
        self.output_size = input.shape[1]
        self.input_diff = self.activation_prime(input)

        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        return self.activation(input)


        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO

        ## Calculating the backpass value
        assert output_error.shape == self.input_diff.shape
        
        backpass = np.multiply(self.input_diff, output_error)
        assert backpass.shape == output_error.shape
          
        ## Nothing to update here

        #Modify the return statement to return numpy array resulting from backward pass
        return backpass
        ## END TODO


In [44]:

class SoftmaxLayer:
    '''
      Implements a Softmax layer which applies softmax function on the inputs. 
    '''
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        '''
        Applies the softmax function 
        Args:
          input : numpy array on which softmax function is to be applied

        Returns:
           numpy array output from the softmax function
        '''
        ## TODO

        ePowX = np.exp(input)
        self.output = ePowX/(np.sum(ePowX, axis = 1).reshape(-1, 1))

        #Modify the return statement to return numpy array
        assert self.output.shape == input.shape
        return self.output
        
        ## END TODO
        
    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a Softmax layer
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        
        ## Calculating the backpass value
        assert output_error.shape == self.output.shape

        temp_k = np.multiply(self.output, output_error)
        temp_k_sum = np.sum(temp_k, axis=1)

        backpass = temp_k - np.multiply(self.output, temp_k_sum.reshape(-1, 1))

        assert backpass.shape == output_error.shape

        ## Nothing to update here also

        #Modify the return statement to return numpy array resulting from backward pass
        return backpass
        ## END TODO


In [45]:
def sigmoid(x):
    '''
    Sigmoid function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying simoid function
    '''
    ## TODO
    ans = 1/(1+(np.exp(-x)))

    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans

    ## END TODO

def sigmoid_prime(x):
    '''
     Implements derivative of Sigmoid function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Sigmoid function
    '''
    ## TODO
    ePowX = np.exp(x)
    ans = np.divide(ePowX , np.square(1+ePowX))

    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans

    ## END TODO

def tanh(x):
    '''
    Tanh function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying tanh function
    '''
    ## TODO
    ePowX = np.exp(x)
    ePowMinusX = np.exp(-x)

    ans = np.divide((ePowX - ePowMinusX), (ePowX + ePowMinusX))

    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans

    ## END TODO

def tanh_prime(x):
    '''
     Implements derivative of Tanh function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Tanh function
    '''
    ## TODO

    ePowX = np.exp(x)
    ePowMinusX = np.exp(-x)

    ans = (4)/np.square(ePowX + ePowMinusX)
    
    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans
    ## END TODO

def relu(x):
    '''
    ReLU function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying ReLU function
    '''
    ## TODO

    ans = np.copy(x)
    ans[ans < 0] = 0.0

    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans
    ## END TODO

def relu_prime(x):
    '''
     Implements derivative of ReLU function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of ReLU function
    '''
    ## TODO

    ans = np.ones(x.shape)
    ans[x < 0] = 0.0

    #Modify the return statement to return numpy array resulting from backward pass
    assert x.shape == ans.shape
    return ans
    

    ## END TODO

In [46]:
def mse(y_true, output):
    '''
    MSE loss
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array .... not predicted labels
    Returns:
       loss : float
    '''
    ## TODO

    """ Mean over various samples """
    assert y_true.shape[0] == output.shape[0]

    n_samples = output.shape[0]
    ans = np.sum(np.square(output), axis = 1)+1

    for i in range(n_samples):
        ans[i] -= 2*output[i][y_true[i]]
    
    ans = np.mean(ans)

    #Modify the return statement to return a float
    return ans
    ## END TODO

def mse_prime(y_true, output):
    '''
    Implements derivative of MSE function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of MSE function
    '''
    ## TODO
    assert y_true.shape[0] == output.shape[0]

    n_samples = y_true.shape[0]

    # ans = (2/n_samples)* (y_pred - y_true)
    ans = output.copy() * 2
    for i in range(n_samples):
        ans[i][y_true[i]] -= 2
    ans /= n_samples

    #Modify the return statement to return numpy array resulting from backward pass
    assert ans.shape == output.shape
    return ans
    ## END TODO

def cross_entropy(y_true, output):
    '''
    Cross entropy loss 
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO
    assert y_true.shape == output.shape

    n_samples = y_true.shape[0]

    ans = -1* np.log(1-output)

    for i in range(n_samples):
        ans[i][y_true[i]] = -1*np.log(output[i][y_true[i]])
    
    ans = np.sum(ans, axis = 1)
    ans = np.mean(ans)

    #Modify the return statement to return numpy array resulting from backward pass
    return ans
    ## END TODO

def cross_entropy_prime(y_true, output):
    '''
    Implements derivative of cross entropy function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of cross entropy function
    '''
    ## TODO
    assert y_true.shape[0] == output.shape[0]

    n_samples = y_true.shape[0]

    ans = 1/(1-output)

    for i in range(n_samples):
        ans[i][y_true[i]] = -1/(output[i][y_true[i]])

    ans /= n_samples

    

    #Modify the return statement to return numpy array resulting from backward pass
    assert ans.shape == output.shape
    return ans
    ## END TODO


## Fit function

In [47]:
def fit(X_train, Y_train,dataset_name):

    '''
    Create and trains a feedforward network

    Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function 
    Args:
        X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        Y_train -- np array of share (num_test,) for flowers and (num_test,) for mnist.
        dataset_name -- name of the dataset (flowers or mnist)
    
    '''
     
    #Note that this just a template to help you create your own feed forward network 
    ## TODO

    X_train = scaling(preprocessing(X_train))

    if(dataset_name == "mnist"):
        #define your network
        #This network would work only for mnist
        network = [
            FlattenLayer(input_shape=(28, 28)),
            FCLayer(28 * 28, 20),
            ActivationLayer(sigmoid, sigmoid_prime),
            FCLayer(20, 10),
            SoftmaxLayer(10)
        ] # This creates feed forward 


        # Choose appropriate learning rate and no. of epoch
        # epochs = 200
        epochs = 60
        learning_rate = 0.8

        ## No regularisation considered for weights...

        batch_size = 2000
        n_samples = X_train.shape[0]

        # Change training loop as you see fit - FINE!
        for epoch in range(epochs):
            error = 0
            for i in range(0, n_samples, batch_size):
                end_index = min(n_samples, i+batch_size)

                x = X_train[i:end_index]
                y_true = Y_train[i:end_index]

                # forward
                output = x
                for layer in network:
                    output = layer.forward(output)
                
                # error (display purpose only)
                error += mse(y_true.reshape(-1,1), output)

                # backward
                output_error = cross_entropy_prime(y_true.reshape(-1, 1), output )
                for layer in reversed(network):
                    output_error = layer.backward(output_error, learning_rate)

            error /= (n_samples/batch_size)
            print('%d/%d, error=%f' % (epoch + 1, epochs, error))


        #Save you model weights
        np.save("./models/{}_FC1_weights".format(dataset_name), network[1].weights)
        np.save("./models/{}_FC1_bias".format(dataset_name), network[1].bias)
        np.save("./models/{}_FC2_weights".format(dataset_name), network[3].weights)
        np.save("./models/{}_FC2_bias".format(dataset_name), network[3].bias)

    """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
    """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
    """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
    """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """

    if(dataset_name == "flowers"):

        #define your network
        #This network would work only for flowers
        network = [
            FCLayer(2048, 20),
            ActivationLayer(tanh, tanh_prime),
            FCLayer(20, 5),
            SoftmaxLayer(5)
        ] # This creates feed forward 


        # Choose appropriate learning rate and no. of epoch
        epochs = 100
        learning_rate = 0.1

        ## No regularisation considered for weights...

        batch_size = 200
        n_samples = X_train.shape[0]

        # Change training loop as you see fit - FINE!
        for epoch in range(epochs):
            error = 0
            for i in range(0, n_samples, batch_size):
                end_index = min(n_samples, i+batch_size)

                x = X_train[i:end_index]
                y_true = Y_train[i:end_index]

                # forward
                output = x
                for layer in network:
                    output = layer.forward(output)
                
                # error (display purpose only)
                error += mse(y_true.reshape(-1,1), output)

                # backward
                output_error = cross_entropy_prime(y_true.reshape(-1, 1), output )
                for layer in reversed(network):
                    output_error = layer.backward(output_error, learning_rate)

            error /= (n_samples/batch_size)
            print('%d/%d, error=%f' % (epoch + 1, epochs, error))


        #Save you model weights
        np.save("./models/{}_FC1_weights".format(dataset_name), network[0].weights)
        np.save("./models/{}_FC1_bias".format(dataset_name), network[0].bias)
        np.save("./models/{}_FC2_weights".format(dataset_name), network[2].weights)
        np.save("./models/{}_FC2_bias".format(dataset_name), network[2].bias)

    
    ## END TODO


# Predict Function

In [48]:
def predict(X_test, dataset_name):
  """

  X_test -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.

  This is the only function that we will call from the auto grader. 

  This function should only perform inference, please donot train your models here.
  
  Steps to be done here:
  1. Load your trained weights from ./models/{dataset_name}_weights.pkl
  2. Ensure that you read weights using only the libraries we have given above.
  3. Initialize your model with your trained weights
  4. Compute the predicted labels and return it

  Please provide us the complete code you used for training including any techniques
  like data augmentation etc. that you have tried out. 

  Return:
  Y_test - nparray of shape (num_test,)
  """
  Y_test = np.zeros(X_test.shape[0],)

  ## TODO
  X_test = scaling(preprocessing(X_test))

  if(dataset_name == 'mnist'):

    #define your network
    #This network would work only for mnist
    network = [
        FlattenLayer(input_shape=(28, 28)),
        FCLayer(28 * 28, 20),
        ActivationLayer(sigmoid, sigmoid_prime),
        FCLayer(20, 10),
        SoftmaxLayer(10)
    ]  # This creates feed forward

    network[1].weights = np.load("./models/{}_FC1_weights.npy".format(dataset_name))
    network[1].bias = np.load("./models/{}_FC1_bias.npy".format(dataset_name) )
    network[3].weights = np.load("./models/{}_FC2_weights.npy".format(dataset_name))
    network[3].bias = np.load("./models/{}_FC2_bias.npy".format(dataset_name))

    # forward
    output = X_test
    for layer in network:
        output = layer.forward(output)

    Y_test = np.argmax(output, axis = 1)

  """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
  """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
  """ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa """
    
  if(dataset_name == 'flowers'):

    #define your network
    #This network would work only for flowers
    network = [
        FCLayer(2048, 20),
        ActivationLayer(tanh, tanh_prime),
        FCLayer(20, 5),
        SoftmaxLayer(5)
    ]  # This creates feed forward

    network[0].weights = np.load("./models/{}_FC1_weights.npy".format(dataset_name))
    network[0].bias = np.load("./models/{}_FC1_bias.npy".format(dataset_name) )
    network[2].weights = np.load("./models/{}_FC2_weights.npy".format(dataset_name))
    network[2].bias = np.load("./models/{}_FC2_bias.npy".format(dataset_name))

    # forward
    output = X_test
    for layer in network:
        output = layer.forward(output)

    Y_test = np.argmax(output, axis = 1)

  ## END TODO
  assert Y_test.shape == (X_test.shape[0],) and type(Y_test) == type(X_test), "Check what you return"
  return Y_test


### Loading MNIST dataset

In [49]:
# ## Loading MNIST dataset

# dataset = "mnist" 
# with open(f"./data/{dataset}_train.pkl", "rb") as file:
#     train_mnist = pkl.load(file)
#     print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")


In [50]:
# ## Splitting MNIST dataset

# X_train, Y_train, X_val, Y_val = split_data(train_mnist[0], train_mnist[1], train_ratio=0.8)


In [51]:
# ## Predict for MNIST

# # X_train, Y_train, X_val, Y_val = split_data(train_mnist[0], train_mnist[1], train_ratio=0.2)


# Y_test = predict(X_val, 'mnist')
# accuracy = np.sum(Y_test == Y_val)/len(Y_test)
# print("Predicting percentage accuracy :", accuracy*100, "%")

### Loading FLOWERS dataset

In [52]:
# ## Loading FLOWERS dataset

# dataset = "flowers"
# with open(f"./data/{dataset}_train.pkl", "rb") as file:
#     train_flowers = pkl.load(file)
#     print(f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")


In [53]:
# ## Splitting FLOWERS dataset

# X_train, Y_train, X_val, Y_val = split_data(train_flowers[0], train_flowers[1], train_ratio=0.8)


In [54]:
# ## Fitting flowers dataset

# fit(X_train, Y_train, 'flowers')

# # Accuracy for fitting
# Y_test = predict(X_train, 'flowers')
# accuracy = np.sum(Y_test == Y_train)/len(Y_test)
# print("Training percentage accuracy:", accuracy * 100, "%")


In [55]:
# ## Predict for FLOWERS

# Y_test = predict(X_val, 'flowers')
# accuracy = np.sum(Y_test == Y_val)/len(Y_test)
# print("Predicting percentage accuracy :", accuracy*100, "%")


In [None]:
dataset = "mnist"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(
        f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")

# fit(train_mnist[0], train_mnist[1], 'mnist')

dataset = "flowers"  # "mnist"/"flowers"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_flowers = pkl.load(file)
    print(
        f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")

# fit(train_flowers[0], train_flowers[1], 'flowers')


In [None]:
# ## Predict for mnist and flowers

# Y_test = predict(train_mnist[0], 'mnist')
# accuracy = np.sum(Y_test == train_mnist[1])/len(Y_test)
# print("Predicting percentage accuracy :", accuracy*100, "%")



# Y_test = predict(train_flowers[0], 'flowers')
# accuracy = np.sum(Y_test == train_flowers[1])/len(Y_test)
# print("Predicting percentage accuracy :", accuracy*100, "%")
