# <span style="color: #8e44ad">Challenge →</span>

- Modify this Neural Network to be within a Python Class. This is so you can instantiate this Neural Network whenever you'd like.

    * The class should have a train function which is essentially the code given above. It should take an epoch paramater which is the number of epochs the network will train for (it is 10,000 in the example above). 
    * The class should have a test function, which will run an example through the network and give an output. Remember, the testing of the function is just the forward pass of the network. You will have to code some logic to give you a digestable output that makes sense. 
    * The neural network should take a parameter upon instantiation that dictates the number of hidden layers. This portion of the challenge will require the most thought and work. Feel free to work with partners on this part as it is very important you understand how this is done. 

**Hint:** Adding multiple layers to the network will be tough. Just remember that your input layer goes to your hidden layer and your hidden layer goes to your output layer. You will have to inject the capability to modularly make more layers using lists of values. All the math for these layers can be reused from the code above.

**Hint:** Draw out your architecture on a piece of paper to visualize it before actually jumping into it. This will help.

**Hint:** You can verify your network is working properly by training it and observing that your loss is indeed going down instead of up.

In [25]:
import random
import math
import time
import numpy as np
import pandas as pd

In [26]:
# transfer functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# derivative of sigmoid
def dsigmoid(y):
    return y * (1.0 - y)

# using softmax as output layer is recommended for classification where outputs are mutually exclusive
def softmax(w):
    exp_scores = np.exp(w)
    # axis is 1 to sum across rows
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# using tanh over logistic sigmoid for the hidden layer is recommended   
def tanh(x):
    return np.tanh(x)
    
# derivative for tanh sigmoid
def dtanh(y):
    return 1 - y*y

# Generate some labels for out data - this will be the true data
def generate_labels(samples, classes):
    y = np.array([])

    for i in range(classes):
        y = np.append(y, [i]*samples)

    np.random.shuffle(y)
    return np.vstack(y[:samples]).astype(int)

# Column normalize with negatives
def normalize_with_negatives(x):
    x = np.array(x)
    # ptp(0) returns the "peak-to-peak" (i.e. the range, max - min) along axis 0.
    # this is necessary to handle negative values and guaranteed the minimum value in each column is 0
    x_normed = (x - x.min(0)) / x.ptp(0)
    return x_normed

def normalize(x):
    x = np.array(x)
    x_normed = x / x.max(axis=0)
    return x_normed

In [27]:
class NeuralNet(object):
    
    # define the class constructor
    def __init__(self,
                 X,
                 y,
                 input_dim,
                 hidden_dims,
                 output_dim,
                 learning_rate,
                 regularization
                 ):
        """
        :param X: training samples
        :param y: training labels
        
        :param input_dim:   number of input neurons
        :param hidden_dims: number of hidden neurons in an array
        :param output_dim:  number of output neurons
        
        """
        
        # initialize parameters
        self.input_dim = input_dim
        self.hidden_dims = hidden_dims
        self.output_dim = output_dim
        self.samples = X.shape[0]
        self.regularization = regularization;
        self.learning_rate = learning_rate;
        self.print_loss=True;
        self.print_loss_iteration=1000;
        self.reg_lambda = 0.01;

        # training data and labels
        self.X = X
        self.y = y

        # +1 as we need n layers - 1 weights and biases
        self.weights = [[] for i in range(len(self.hidden_dims)+1)]        
        self.biases = [[] for i in range(len(self.hidden_dims)+1)]    

        # preactivation layer arrays
        self.z = [[] for i in range(len(self.hidden_dims)+1)] 
        # activation layer arrays
        self.a = [[] for i in range(len(self.hidden_dims)+1)]
        # deltas arrays
        self.deltas = [[] for i in range(len(self.hidden_dims)+1)]
        self.deltaWeights = [[] for i in range(len(self.hidden_dims)+1)]
        self.deltaBiases = [[] for i in range(len(self.hidden_dims)+1)]

        # Initialize the parameters to random values. We need to learn these.
        np.random.seed(0)

        # the reason we are going to the size od hidden_dims + 1 is because we need 
        # weights and biases for the total number of layers - 1
        for i in range(len(self.hidden_dims)+1):
            np.random.seed(np.random.randint(99))
            if i == 0:
                # do stuff for first layer
                self.weights[i] = np.random.randn(self.input_dim, self.hidden_dims[i])# / np.sqrt(self.input_dim)
                self.biases[i] = np.zeros((1, self.hidden_dims[i]))

            elif i == len(self.hidden_dims):
                # do stuff for last layer
                self.weights[i] = np.random.randn(self.hidden_dims[i-1], self.output_dim)# / np.sqrt(self.hidden_dims[i-1])
                self.biases[i] = np.zeros((1, self.output_dim))

            else:
                # do stuff for every other middle juicy deep layer
                self.weights[i] = np.random.randn(self.hidden_dims[i-1], self.hidden_dims[i])# / np.sqrt(self.hidden_dims[i-1])
                self.biases[i] = np.zeros((1, self.hidden_dims[i]))


        # This is what we return at the end
        self.model = {}

        
    # calculate the accuracy of your training data
    # :param predictions: your actual predictions (1d array)
    # :param labels: real labels (1d array)
    def calculate_accuracy(self, predictions, labels):
        
        accuracy = 0
        for i in range(len(predictions)):
            
            if predictions[i] == labels[i]:
                accuracy += 1
                
        return accuracy / len(predictions)
        
        
    # feed forward function since it is being called a lot
    def feedforward(self, X):
        
        # Forward propagation to calculate our predictions
        probs = []
        
        for i in range(len(self.hidden_dims) + 1):
            if i == 0:
                # do stuff for first layer
                self.z[i] = X.dot(self.weights[i]) + self.biases[i]
                self.a[i] = np.tanh(self.z[i])
                
            elif i == len(self.hidden_dims):
                # do stuff for last layer
                self.z[i] = self.a[i-1].dot(self.weights[i]) + self.biases[i]
                # calculate the probabilities of the output layer
                probs = softmax(self.z[i])
                
            else:
                # do stuff for every other middle juicy deep layer
                self.z[i] = self.a[i-1].dot(self.weights[i]) + self.biases[i]
                self.a[i] = np.tanh(self.z[i])
                
        return probs
    
    # Helpers function to evaluate the total loss on the dataset
    def calculate_loss(self, model):
        
        # feed forward to get our predictions
        probs = self.feedforward(self.X)            
        
        correct_logprobs = -np.log(probs[range(self.samples), np.matrix(self.y).A1.astype(int)])
        data_loss = np.sum(correct_logprobs)
        
        # Add regulatization term to loss (optional)
        if self.regularization == True:
            sum = 0
            for i in range(len(self.weights)):
                # add the sum to a cumulative variable for use later to calculate data loss
                sum += np.sum(np.square(self.weights[i]))

            data_loss += self.reg_lambda/2 * sum
        
        return 1./self.samples*data_loss
    
    
     # Helper function to predict an output (0 or 1)
    def predict(self, X):
        
        # feed forward to get our predictions
        probs = self.feedforward(X)
        
        return np.argmax(probs, axis=1)
        
    # Define a training method that accepts a number of epoch iterations to run
    def train(self, epoch_iterations):
        
        self.samples = self.X.shape[0] # 1500 samples
        self.features = self.X.shape[1] # 2 features
        classes = 3

        alpha = 10e-6
        costs = []
        
        self.ei = epoch_iterations
        
        for j in range(self.ei + 1):
            # forward pass
            probs = self.feedforward(self.X)

            # BACKPROPAGATION

            # let's go in the opposite direction this time ;)
            for i in reversed(range(len(self.hidden_dims) + 1)):
                if i == len(self.hidden_dims):
                    # do stuff for first delta (which is actually the last in the array)
                    self.deltas[i] = probs

                    self.deltas[i][range(len(self.y)), np.matrix(self.y).A1.astype(int)] -= 1 
                    self.deltaWeights[i] = (self.a[i-1].T).dot(self.deltas[i])
                    self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)

                elif i == 0:
                    # do stuff for first layer
                    self.deltas[i] = self.deltas[i+1].dot(self.weights[i+1].T) * (1 - np.power(self.a[i], 2))
                    self.deltaWeights[i] = np.dot(self.X.T, self.deltas[i])
                    self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)

    #                 elif i == 0:
    #                     # do nothing
    #                     print("end of backprop loop",i)

                elif i != 0:
                    # do stuff for every other middle juicy deep layer
                    self.deltas[i] = self.deltas[i+1].dot(self.weights[i+1].T) * (1 - np.power(self.a[i], 2))
                    self.deltaWeights[i] = (self.a[i-1].T).dot(self.deltas[i])
                    self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)


            if self.regularization == True:
                for i in range(len(self.deltaWeights)):
                    self.deltaWeights[i] += self.reg_lambda * self.weights[i]

            for i in range(len(self.weights)):
                self.weights[i] += -self.learning_rate * self.deltaWeights[i]
                self.biases[i] += -self.learning_rate * self.deltaBiases[i]

            # Assign new parameters to the model
            model = { 'weights': self.weights, 'biases': self.biases}

            # Optionally print the loss.
            # This is expensive because it uses the whole dataset, so we don't want to do it too often.
            if self.print_loss and j % self.print_loss_iteration == 0:
                print ("Loss after iteration %i: %f" %(j, self.calculate_loss(model)))

        return model

    # DISPLAY THE COST        
    #plt.plot(costs)
    #plt.show()
        
    # Define a testing method that accepts an array of test data
    def test(self, X_test, y_test):
        
        # calculate loss
        nn.calculate_loss({})
        
        # forward pass (over pre-split test data)
        probs = self.feedforward(X_test)
        
        # magic
        predictions = nn.predict(X_test)
        np.set_printoptions(threshold=np.inf)
        print(np.array(predictions))
        
        labels = y_test.astype(int).squeeze()
        labels
        accuracy = nn.calculate_accuracy(predictions, labels)
        accuracy = accuracy * 100
        # output accuracy
        return accuracy

In [28]:
# generate dynamic Gaussian clouds each holding 500 points
X1 = np.random.randn(200, 2) + np.array([0, -2])
X2 = np.random.randn(200, 2) + np.array([2, 2])
X3 = np.random.randn(200, 2) + np.array([-2, 2])

# put them all in a big matrix
X = np.vstack([X1, X2, X3])
data = X
X = np.array(data[:, :1])
y = np.array(data[:, [1]])

# generate the one-hot-encodings
# remember: these labels will be the corresponding classes to the data we generated above.
labels = np.array([0]*200 + [1]*200 + [2]*200)
T = np.zeros((600, 3))
for i in range(600):
    T[i, labels[i]] = 1

test_train_split = 0.7
y_train = y[:int(np.rint(y.shape[0]*test_train_split))]
X_train = X[:int(np.rint(X.shape[0]*test_train_split))]
y_test = y[int(np.rint(y.shape[0]*test_train_split)):]
X_test = X[int(np.rint(X.shape[0]*test_train_split)):]

nn = NeuralNet(X_train, 
               y_train, 
               X_train.shape[1], 
               [1, 8, 1], 
               len(np.unique(y)),
               learning_rate = 0.001,
               regularization = True)

In [None]:
# START TRAINING HERE
nn.train(10000)

Loss after iteration 0: 1.681307
Loss after iteration 1000: 1.681203
Loss after iteration 2000: 1.681104
Loss after iteration 3000: 1.681009


In [47]:
# START TESTING HERE
nn.test(X_test, y_test)

[  0 598   0   2 598   2   2   0 598 598   0   2 599   2   2   2   2   2
   2   2   2 598   0   0 598   0   0   2   2 599   2   0   2   0 599 598
   2 598 599   2 598   2   0   0   0   0   0   2   2   2 599   2   0   0
   2   2 599   2   0   2 598   0   2   0   2 599 598   0 598   2   2   2
   0   0 598   2 598 599 599 598   2   2 598   0   2   0   2 599   2 598
   0 598 599 598   0 598   0 599 599 598   2 598 598   0 598   0   2   0
   2   2 599   0 598   2 598   2   2   2 598   2   2   0   2   2   2   0
   2   2 599 599   2 599   2 598   2   2 598   2 599   2   2 599   2   0
 599   2   2   0   2   0   2   0 598   2 599   2   0   2   0 599   2   2
   0 598   2   2   0   2 598   2   2 598   2   2 599   2   0 599   0   2]


21.666666666666668