In [2]:
import pandas as pd
import numpy as np

## MNIST Data Set
MNIST Data Set contains data for 28x28 pixel gray scale images of handwritten numbers from 0-9. Specifically, the data represents 784 (28*28) pixels per image that can take on values from 0-255. Zero being the color black and 255 being white. 

In [30]:
MNIST_train = pd.read_csv("mnist_train.csv")
print(MNIST_train.shape)
MNIST_train = pd.concat([MNIST_train,pd.get_dummies(MNIST_train.label,prefix = "label")],axis=1)
MNIST_train.drop(columns=['label'], axis =1, inplace = True)
MNIST_test = pd.read_csv("mnist_test.csv")
print(MNIST_test.shape)
MNIST_test = pd.concat([MNIST_test,pd.get_dummies(MNIST_test.label,prefix = "label")],axis=1)
MNIST_test.drop(columns=['label'], axis =1, inplace = True)

# convert to np array for the NN Class
MNIST_test = np.array(MNIST_test)
MNIST_train = np.array(MNIST_train)
MNIST_test = MNIST_test.T
X_test = MNIST_test[0:784]/255
Y_test = MNIST_test[784:795]
MNIST_train = MNIST_train.T
X_train = MNIST_train[0:784]/255
Y_train = MNIST_train[784:795]



#print(MNIST_train.head())
#print(MNIST_test.head())

(60000, 785)
(10000, 785)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
(794, 10000)


## Layers of NN
My Hidden and Output layers will both be fully connected for simplicity.
### Input Layer
Input layer # of nodes = # of features in the data set. For our data set there are 784 features representing each of the pixels
### Hidden Layers
I am going to choose to do a single hidden layer conisisting of ten nodes. I will use ReLU as the activation function for the hidden layer.
### Output Layers
Output layer # of nodes = 1 for regression, and for multiclass problems it is dependent on the number of classes you are trying to predict. In this case, I am trying to predict the numeral an image represents (0-9). Therefore, my output layer will have 10 nodes representing the digits 0-9. I will use ReLU as the activation function for the output layer.
![NN_Diagram](NN_Diagram.png)



## Training
Once the nodes and layers are setup, it is time to think about how we train the weights and biasis corresponding to each layer. This is a 3 part process. Weights and Biasis are called the learning parameters. 

### 1. Forward Propogation

### 2. Backwards Propogation

### 3. Parameter Updates

In [120]:
# class for NN with one hidden layer
class NN:
    def __init__(self,numInputNodes,numHiddenNodes,numOutputNodes,learningRate,iterations):
        self.numInputNodes = numInputNodes
        self.numHiddenNodes = numHiddenNodes
        self.numOutputNodes = numOutputNodes
        self.learningRate = learningRate
        self.iterations = iterations
    
    # initializes a weight and biases as np arrays
    def Init_Learning_Params(self):
        # gives a value between 0 and 1
        # want a value between -.5 and .5
        self.W1 = np.random.rand(self.numHiddenNodes,self.numInputNodes) -0.5
        self.B1 = np.random.rand(self.numHiddenNodes, 1)
        self.W2 = np.random.rand(self.numOutputNodes, self.numHiddenNodes) -0.5
        self.B2 = np.random.rand(self.numOutputNodes, 1)
        
        
        #return W1,W2,B1,B2
    
    # TODO make sure this works with np array
    def Relu(self,value):
        return np.maximum(value,0)
    # TODO make sure this works with np array
    def Relu_Prime(self,value):
        return value>0
    
    # performs forward propogation through the hidden layer and output layer
    # returns the result after being passed through the activation function ReLU
    # 
    def Forward_Propogate(self,Input):
        # finds the sum of the inputs to the hidden nodes from the input nodes
        self.Hidden = self.W1.dot(Input)+self.B1
        # runs relu on the sum array
        self.Hidden_Activated = self.Relu(self.Hidden)
        # uses hidden_activated as the input to multiply by the weights
        self.Output = self.W2.dot(self.Hidden_Activated)+self.B2
        self.Output_Activated = self.Relu(self.Output)
        
    def Forward_Propogate_Predict(self,Input):
        # finds the sum of the inputs to the hidden nodes from the input nodes
        Hidden = self.W1.dot(Input)+self.B1
        # runs relu on the sum array
        Hidden_Activated = self.Relu(self.Hidden)
        # uses hidden_activated as the input to multiply by the weights
        Output = self.W2.dot(self.Hidden_Activated)+self.B2
        Output_Activated = self.Relu(self.Output)
        return Hidden, Hidden_Activated, Output, Output_Activated
        
    def Backward_Propogation(self,Input, Expected_Output):
        # Output to Hidden layer
        Hidden_Prime = self.Hidden_Activated - Expected_Output
        self.W2_Prime = 1/ Expected_Output.size * Hidden_Prime.dot(self.Output_Activated.T)
        self.B2_Prime = 1/ Expected_Output.size * np.sum(Hidden_Prime)
        # Hidden layer to Input
        Output_Prime = self.W2.T.dot(Hidden_Prime)*self.Relu_Prime(self.Output)
        self.W1_Prime = 1/ Expected_Output.size * Hidden_Prime.dot(Input.T)
        self.B1_Prime = 1/Expected_Output.size * np.sum(Output_Prime)
    
    def Train(self,Input, Expected_Output):
        self.Init_Learning_Params()
        for x in range(self.iterations):
            self.Forward_Propogate(Input)
            self.Backward_Propogation(Input, Expected_Output)
            self.W1 = self.W1-self.learningRate * self.W1_Prime
            self.W2 = self.W2-self.learningRate * self.W2_Prime
            self.B1 = self.B1-self.learningRate * self.B1_Prime
            self.B2 = self.B2-self.learningRate * self.B2_Prime
            print("Iteration ", x)
            print("Accuracy = ", np.sum(np.argmax(self.Output_Activated,0)== Expected_Output)/Expected_Output.size)

    
    def Test(self,Input,Expected_Output):
        _,_,_,Output_Activated = self.Forward_Propogate_Predict(Input)
        result = np.argmax(Output_Activated,0)
        print(result)
        print("Accuracy = ", np.sum(np.argmax(result,0)== Expected_Output)/Expected_Output.size)
        
    
    

In [119]:
myNet = NN(784,10,10,0.15,150)
myNet.Train(X_train,Y_train)

Iteration  0
Accuracy =  0.0008616666666666666
Iteration  1
Accuracy =  0.000375
Iteration  2
Accuracy =  0.000215
Iteration  3
Accuracy =  0.00015333333333333334
Iteration  4
Accuracy =  0.00013333333333333334
Iteration  5
Accuracy =  0.00010333333333333333
Iteration  6
Accuracy =  9.166666666666667e-05
Iteration  7
Accuracy =  9.333333333333333e-05
Iteration  8
Accuracy =  8.5e-05
Iteration  9
Accuracy =  8.666666666666667e-05
Iteration  10
Accuracy =  8e-05
Iteration  11
Accuracy =  8.333333333333333e-05
Iteration  12
Accuracy =  8.333333333333333e-05
Iteration  13
Accuracy =  8.666666666666667e-05
Iteration  14
Accuracy =  8.833333333333333e-05
Iteration  15
Accuracy =  9.166666666666667e-05
Iteration  16
Accuracy =  9.166666666666667e-05
Iteration  17
Accuracy =  9.666666666666667e-05
Iteration  18
Accuracy =  9.5e-05
Iteration  19
Accuracy =  9.666666666666667e-05
Iteration  20
Accuracy =  0.0001
Iteration  21
Accuracy =  0.00010166666666666667
Iteration  22
Accuracy =  0.0001016

In [116]:
myNet.Test(X_test,Y_test)

Accuracy =  0.0
