# Feed Forward Neural Network with Back Propagation

The objective of this notebook is to implement smartly and scalably a fully connected neural network that will be trained by means of a gradient descent back propagation algorithm. Afterwards I will build a neural net with a few layers to classify gas gazzlers from the mtcars data set and another network to predict mpg.

* Get the mtcars data, bulid features and also divide into train and test
* Create classes and methods for marticular modules - linear and activation of the neural network
* Create a neural network class which is build out of modules and can be trained. 
* Train the network on mtcars and see if we are getting any reasonable results.


### Data preparation

In [37]:
import numpy as np
import pandas as pd
import csv
import itertools
from matplotlib import pyplot as plt

def load_data(path):
    '''
    takes path and returns a pnadas data frame object with the data from file under path
    '''
    data = []
    with open(path) as f:
        for row in csv.DictReader(f, delimiter='\t'):
            data.append(row)
    return pd.DataFrame(data)


def one_hot(x):
    '''
    @param x : pandas series object
    returns a data frame with the original series and the one hot encoded fields
    '''
    
    new_frame = pd.DataFrame(x)
    for val in x.unique():
        _name = str(int(val))
        new_frame[x.name + _name] = new_frame.apply(lambda col: 1 if col[x.name] == val else 0, axis=1)
    return new_frame.iloc[:,1:]
        
    
def standardize(x):
    '''
    @param x : string holding the name of the field to be standarized
    '''
    z = x.astype('float')
    result = np.array((z - np.mean(z))/np.std(z))

    return pd.DataFrame(result, columns=[x.name])


def make_features(feature_list):
    '''
    @param feature_list: a list of tuples, first entry is a pandas series and the next one is a string with 
    onehot or standardize. 
    '''
    new_features = []
    for f,ftype in feature_list:
        if ftype == 'onehot':
            new_features.append(one_hot(f))
        elif ftype == 'standardize':
            new_features.append(standardize(f))
        else:
            new_features.append(f)
        
    return pd.concat(new_features, axis=1)



auto_data = load_data('../code_and_data_for_hw05/auto-mpg-regression.tsv')


features = [
            (auto_data.cylinders, 'onehot'),
            (auto_data.displacement, 'standardize'),
            (auto_data.horsepower, 'standardize'),
            (auto_data.weight, 'standardize'),
            (auto_data.acceleration, 'standardize'),
            (auto_data.origin, 'onehot'),
            (auto_data.mpg, 'standardize')
            ]


auto_data = make_features(features)


# Keep this for future reference:
mean_mpg, sigma_mpg = auto_data['mpg'].astype('float').mean(), auto_data['mpg'].astype('float').std() 


# Transpose  data and divide into training and testing sets:
X = (auto_data.T).iloc[:-1, :]
Y = (auto_data.T).iloc[[-1], :]

# Divide at random into test and train. The test data set will be approzimately 20% of the entire data set
test_ind = np.random.choice(range(X.shape[1]), int(0.2 * X.shape[1]), replace = False)
train_ind = np.array(list(set(range(X.shape[1]))- set(test_ind)))

X_test, Y_test = np.array(X.iloc[:, test_ind]), np.array(Y.iloc[:, test_ind])
X_train, Y_train = np.array(X.iloc[:, train_ind]), np.array(Y.iloc[:, train_ind])




### Neural Network Implementation

In [75]:
class Linear(object):
    def __init__(self, m,n):
        '''
            This is the linear part that takes in m inputs, multiplies by weights and produces n outputs
            m - number of imputs
            n - number of outputs
        '''
        self.m = m
        self.n = n
        self.W = np.random.normal(loc=0.0, scale=1.0, size=self.m * self.n).reshape((self.n, self.m))
        
    def forward(self, X):
        '''
            X are the inputs. The forward method will produce and save activations and also return them
        '''
        assert X.shape[0] == self.m
        self.A = X  # if self is layer l, then self.A is l-1 layer, the inputs to this module
        self.Z  = np.matmul(self.W, X)
        return self.Z
    
    def backward(self, dLdZ):
        '''
            dLdZ is passed from the activation layer. We can compyte dLdA = dLdZ * dZdA, but dZdA = W
            and pass it to the previous activation layer. 
            knowing dLdZ, we compute dLdW = dLdZ * dZdW = dLdZ * A(l-1)  and store it
        '''
        self.dLdA = np.matmul((self.W).T, dLdZ)
        self.dLdW = np.matmul(self.A, dLdZ.T).T
        return self.dLdA
    
    def udpdate(self, lrate):
        '''
            just update weights 
        '''
        self.W = self.W - lrate * self.dLdW
    


    
X = np.array([[2,3],[2,3],[1,1]])    
L1 = Linear(3,4)
print('W ', L1.W.shape)
print('input ', X.shape)
L1.forward(X)

print('input A ',L1.A.shape)

print('output Z ', L1.Z.shape)
dLdZ = np.array([[1,2],[3,4],[5,6],[7,8]])

print('dLdZ ', dLdZ.shape)
L1.backward(dLdZ)

print('dLdA ', L1.dLdA.shape)
print('dLdW ', L1.dLdW.shape)



W  (4, 3)
input  (3, 2)
input A  (3, 2)
output Z  (4, 2)
dLdZ  (4, 2)
dLdA  (3, 2)
dLdW  (4, 3)


In [78]:
class ReLU(object):
    def __init__(self, m):
        '''
            the activation module will take an input of m from the previous linear module and also return m 
            elements being the images of ReLU function applied on those elements
        '''
        self.m = m
    
    def forward(self, X):
        '''
            Take an X as input and apply ReLU elementwise
        '''
        def ReLU(x):
            return x if x>=0 else 0
        self.A = X
        self.Z = np.vectorize(lambda x: ReLU(x))(self.A)
        return self.Z

    def backward(self, dLdA):
        '''
            Takes dLdA, where dA means with respect to the output of the module.  
            Returns dLdZ (Z is the input to the module). dLdZ = dLdA * dAdZ , but dAdZ is ReLU_grad
        '''
        def ReLU_grad(x):
            return 1 if x>=0 else 0
        return dLdA * np.vectorize(lambda x: ReLU_grad(x))(self.A) 
    
    
R = ReLU(4)  
a = L1.forward(np.array([[2,3],[2,3],[1,1]]))
print(a)
R.forward(a)
dLdA = np.array([[0.6, 0.3], [0.9,1],[3,3],[1.2, 3.4]])
R.backward(dLdA)

In [None]:
class quadratic_loss():
    def __init__(self, m):
        '''
            m is the number of inputs which is the number of outputs of the last module
        '''
        self.m = m
    
    def q_loss(self, Ypred, Y):
        '''
            simple quadratic loss
        '''
        return ((Y - Ypred)**2).sum()/(Ypred.shape[0])
    
    def q_loss_grad(self, Ypred, Y):
        '''
            quadratic loss gradient
        '''
        self.A = Ypred
        self.dLdA = 2*(Y - Ypred).sum()/(Ypred.shape[0])
        return self.dLdA
    


In [None]:
class nn():
    def __init__(self, modules, loss_module):
        self.modules = modules
        self.loss_module = loss_module
        
    def forward(X):
        