
## <font color=red> You should not import any new libraries. Your code should run with python=3.x</font>

#### <font color=red>For lab assignment, you will work with two datasets. The trained weights need to be saved and shared with us in a folder called models with the name ./models/{dataset_name}_weights.pkl. Your predict function should load these weights, initialize the DNN and predict the labels.</font>

- Your solutions will be auto-graded. Hence we request you to follow the instructions.
- Modify the code only between 
```
## TODO
## END TODO
```
- In addition to above changes, you can play with arguments to the functions for generating plots
- We will run the auto grading scripts with private test data

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import math
import pickle as pkl


### Preprocessing and Normalizing


In [None]:
def preprocessing(X):
    """
    Implement Normalization for input image features

    Args:
        X: input features - numpy array of shape (n_samples, 784)

    Returns:
        X_out: normalized features - numpy array of shape (n_samples, 784)
    """

    return X.astype(np.float32) / 255


In [None]:
def normalizing(X, mean=None, std=None):
    """
    Implement Normalization for input features

    Args:
        X: input features - numpy array of shape (n_samples, 2048)

    Returns:
        X_out: normalized features - numpy array of shape (n_samples, 2048)
    """

    if mean is None or std is None:
        mean = X.mean(axis=0)
        std = X.std(axis=0)

    return (X - mean) / std, mean, std


### Split data into train/val


In [None]:
def split_data(X, Y, train_ratio=0.8):
    '''
    Split data into train and validation sets
    floor(train_ratio*n_samples) samples form the train set and the remaining the test set

    Args:
        X: data - numpy array of shape (n_samples, n_features)
        Y: labels - numpy array of shape (n_samples, 1)
        train_ratio: fraction of samples to be used as training data

    Returns:
        X_train: train data - numpy array of shape (floor(train_ratio*n_samples), n_features)
        Y_train: train labels - numpy array of shape (floor(train_ratio*n_samples), 1)
        X_val: test data - numpy array of shape (n_samples - floor(train_ratio*n_samples), n_features)
        Y_val: test labels - numpy array of shape (n_samples - floor(train_ratio*n_samples), 1)
    '''

    num_samples = len(X)
    indices = np.arange(num_samples)
    num_train_samples = math.floor(num_samples * train_ratio)
    train_indices = np.random.choice(indices, num_train_samples, replace=False)
    val_indices = list(set(indices) - set(train_indices))
    X_train, Y_train = X[train_indices], Y[train_indices]
    X_val, Y_val = X[val_indices], Y[val_indices]

    return X_train, Y_train, X_val, Y_val


### Flatten the input


In [None]:
class FlattenLayer:
    """
    This class converts a multi-dimensional into 1-d vector
    """

    def __init__(self, input_shape):
        """
        Args:
            input_shape : Original shape, tuple of ints
        """

        self.input_shape = input_shape
        self.new_shape = np.prod(input_shape)

    def forward(self, input):
        """
        Converts a multi-dimensional into 1-d vector

        Args:
            input: training data, numpy array of shape (n_samples, self.input_shape)

        Returns:
            output: numpy array of shape (n_samples, self.new_shape)
        """

        return np.reshape(input, (input.shape[0], self.new_shape))

    def backward(self, output_error, learning_rate):
        """
        Converts back the passed array to original dimension

        Args:
            output_error: numpy array of shape (1, self.new_shape)
            learning_rate: float

        Returns:
            output_error: numpy array of shape (1, self.input_shape)
        """

        return np.reshape(output_error, (output_error.shape[0], *self.input_shape))

### Fully Connected Layer


In [None]:
class FCLayer:
    """
    Implements a fully connected layer
    """

    def __init__(self, input_size, output_size):
        """
        Args:
            input_size: Input shape, int
            output_size: Output shape, int
        """

        self.input_size = input_size
        self.output_size = output_size

        # Random initialization of weights and bias
        self.weights = np.random.randn(input_size, output_size) / np.sqrt(input_size + output_size)
        self.bias = np.random.randn(1, output_size) / np.sqrt(input_size + output_size)

    def forward(self, input):
        """
        Performs a forward pass of a fully connected network

        Args:
            input: training data, numpy array of shape (n_samples, self.input_size)

        Returns:
            output: numpy array of shape (n_samples, self.output_size)
        """

        # Save the input for calculating d_weights in backward pass
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        """
        Performs a backward pass of a fully connected network along with updating the parameter

        Args:
            output_error: numpy array of shape (n_samples, self.output_size)
            learning_rate: float

        Returns:
            input_error: numpy array of shape (n_samples, self.input_size)
        """

        # Compute gradients
        d_weights = np.dot(self.input.T, output_error) # del E / del W
        d_bias = np.sum(output_error, axis=0) # del E / del B
        input_error = np.dot(output_error, self.weights.T) # del E / del X

        # Update weights
        self.weights -= learning_rate * d_weights
        self.bias -= learning_rate * d_bias

        # Return error for previous layer
        return input_error


In [None]:
class ActivationLayer:
    """
    Implements a Activation layer which applies activation function on the inputs.
    """

    def __init__(self, activation, activation_prime):
        """
        Args:
            activation : activation function (sigmoid, tanh or relu)
            activation_prime: derivative of activation function (sigmoid_prime,tanh_prime or relu_prime)
        """

        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        """
        Applies the activation function

        Args:
            input: numpy array of shape (n_samples, input_size)

        Returns:
            output: numpy array of shape (n_samples, input_size)
        """

        # Save the input for calculating input_error in backward pass
        self.input = input
        return self.activation(input)

    def backward(self, output_error, learning_rate):
        """
        Performs a backward pass of a fully connected network along with updating the parameter

        Args:
            output_error: numpy array of shape (n_samples, input_size)
            learning_rate: float

        Returns:
            input_error: numpy array of shape (n_samples, input_size)
        """

        return output_error * self.activation_prime(self.input)


In [None]:
class SoftmaxLayer:
    """
    Implements a Softmax layer which applies softmax function on the inputs.
    """

    def __init__(self, input_size):
        """
        Args:
            input_size: Input shape, int
        """

        self.input_size = input_size

    def forward(self, input):
        """
        Applies the softmax function

        Args:
            input: numpy array of shape (n_samples, self.input_size)

        Returns:
            output: numpy array of shape (n_samples, self.input_size)
        """

        self.input = input
        self.output = np.exp(input) / np.sum(np.exp(input), axis=1).reshape((input.shape[0], 1))
        return self.output

    def backward(self, output_error, learning_rate):
        """
        Performs a backward pass of a Softmax layer

        Args:
            output_error: numpy array of shape (n_samples, self.input_size)
            learning_rate: float

        Returns:
            input_error: numpy array of shape (n_samples, self.input_size)
        """

        input_error = np.zeros_like(output_error)
        for i in range(output_error.shape[0]):
            out = np.tile(self.output[i].T, (self.input_size, 1))
            input_error[i] = self.output[i].reshape((1, self.input_size)) * np.dot(output_error[i], np.identity(self.input_size) - out)
        return input_error


In [None]:
def sigmoid(x):
    """
    Sigmoid function

    Args:
        x: numpy array

    Returns:
        sig(x)
    """

    return 1 / (1 + np.exp(-x))


def sigmoid_prime(x):
    """
    Derivative of Sigmoid function

    Args:
        x: numpy array

    Returns:
        sig'(x)
    """

    return np.exp(-x) / (1 + np.exp(-x))**2


def tanh(x):
    """
    Tanh function

    Args:
        x: numpy array

    Returns:
        tanh(x)
    """

    return np.tanh(x)


def tanh_prime(x):
    """
    Derivative of Tanh function

    Args:
        x: numpy array

    Returns:
        tanh'(x)
    """

    return 1 - np.tanh(x)**2


def relu(x):
    """
    ReLU function

    Args:
        x: numpy array

    Returns:
        relu(x)
    """

    return np.maximum(x, 0)


def relu_prime(x):
    """
    Derivative of ReLU function

    Args:
        x: numpy array

    Returns:
        relu'(x)
    """

    return np.array(x >= 0).astype('float32')


In [None]:
def mse(y_true, y_pred):
    """
    MSE loss

    Args:
        y_true: Ground truth labels - numpy array of shape (n_samples, self.input_size)
        y_pred: Predicted labels - numpy array of shape (n_samples, self.input_size)

    Returns:
        loss : float
    """

    return np.sum(np.mean((y_true - y_pred)**2, axis=0))


def mse_prime(y_true, y_pred):
    """
    Derivative of MSE loss

    Args:
        y_true: Ground truth labels - numpy array of shape (n_samples, self.input_size)
        y_pred: Predicted labels - numpy array of shape (n_samples, self.input_size)

    Returns:
        derivatives: numpy array of shape (n_samples, self.input_size)
    """

    return 2 * (y_pred - y_true) / y_pred.shape[1]


def cross_entropy(y_true, y_pred):
    """
    Cross entropy loss
    Args:
        y_true :  Ground truth labels, numpy array
        y_true :  Predicted labels, numpy array
    Returns:
       loss : float
    """

    ## TODO

    # Modify the return statement to return numpy array resulting from backward pass
    return 0
    ## END TODO


def cross_entropy_prime(y_true, y_pred):
    """
    Implements derivative of cross entropy function, for the backward pass
    Args:
        x :  numpy array
    Returns:
        Numpy array after applying derivative of cross entropy function
    """

    ## TODO

    # Modify the return statement to return numpy array resulting from backward pass
    return None
    ## END TODO

### Fit function

In [None]:
def fit(X_train, Y_train, dataset_name):

    """
    Create and trains a feedforward network

    Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function
    Args:
        X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        Y_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        dataset_name -- name of the dataset (flowers or mnist)
    """

    # Note that this just a template to help you create your own feed forward network
    ## TODO

    # define your network
    # This network would work only for mnist
    network = [
        FlattenLayer(input_shape=(28, 28)),
        FCLayer(28 * 28, 12),
        ActivationLayer(sigmoid, sigmoid_prime),
        FCLayer(12, 10),
        SoftmaxLayer(10),
    ]  # This creates feed forward

    # Choose appropriate learning rate and no. of epoch
    epochs = 40
    learning_rate = 0.1

    # Change training loop as you see fit
    for epoch in range(epochs):
        error = 0
        for x, y_true in zip(x_train, y_train):
            # forward
            output = x
            for layer in network:
                output = layer.forward(output)

            # error (display purpose only)
            error += mse(y_true, output)

            # backward
            output_error = mse_prime(y_true, output)
            for layer in reversed(network):
                output_error = layer.backward(output_error, learning_rate)

        error /= len(x_train)
        print("%d/%d, error=%f" % (epoch + 1, epochs, error))

    # Save you model weights

    ## END TODO

### Loading datasets

In [None]:
dataset = "mnist"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")

fit(train_mnist[0], train_mnist[1], "mnist")

dataset = "flowers"  # "mnist"/"flowers"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_flowers = pkl.load(file)
    print(f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")

fit(train_flowers[0], train_flowers[1], "flowers")

In [None]:
def predict(X_test, dataset_name):
    """
    X_test -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.

    This is the only function that we will call from the auto grader. 

    This function should only perform inference, please donot train your models here.

    Steps to be done here:
    1. Load your trained weights from ./models/{dataset_name}_weights.pkl
    2. Ensure that you read weights using only the libraries we have given above.
    3. Initialize your model with your trained weights
    4. Compute the predicted labels and return it

    Please provide us the complete code you used for training including any techniques
    like data augmentation etc. that you have tried out. 

    Return:
        Y_test - nparray of shape (num_test,)
    """
    
    Y_test = np.zeros(X_test.shape,)

    ## TODO

    ## END TODO
    assert Y_test.shape == (X_test.shape,) and type(Y_test) == type(X_test), "Check what you return"
    return Y_test
