In [1]:
import math
import numpy as np
import pandas as pd

In [42]:
def sigmoid(activation):
    return 1.0 / (1.0 + np.exp(-activation))

def sigmoid_derivative(output):
    return output * (1.0 - output)

class MLP:
    def __init__(self,X):
        self.lr = 1
        self.h_weights   = np.random.rand(X.shape[1],128)   # 784 input, 128 neurons
        self.o_weights   = np.random.rand(128,9)   # 9 signals of output
    
    
    def propagate_forwad(self, X):
        self.layer1 = sigmoid(np.dot(X, self.h_weights))
        self.output = sigmoid(np.dot(self.layer1, self.o_weights))
        return self.output
    
    def propagate_backwards(self,X,d,activations):
        ### get the error and delta of outputlayer
        E = d - self.output
        delta_output = E * sigmoid_derivative(self.output)
        
        ### apply chain rule to find the derivative of loss function regarding each weights 
        error_at_hidden = np.dot(delta_output, self.o_weights.T)
        delta_hidden = error_at_hidden * sigmoid_derivative(self.layer1)
                                 
        d_weights2 = np.dot(self.layer1.T, delta_output)
        d_weights1 = np.dot(X.T,   delta_hidden)

        # update the weights with the derivative (slope) of the loss function
        self.h_weights  += d_weights1 * self.lr
        self.o_weights += d_weights2 * self.lr
        
        
        

if __name__ == "__main__":
    X = np.array([[0,0,1],
                  [0,1,1],
                  [1,0,1],
                  [1,1,1]])
    y = np.array([[0],[1],[1],[0]])
    
    mlp = MLP(X)
    
    for i in range(500):
        feed = mlp.propagate_forwad(X)
        mlp.propagate_backwards(X,y,feed)

In [43]:
df_test = pd.read_csv('mnist_test.csv')
df_train = pd.read_csv('mnist_train.csv')

y_train,X_train = np.split(df_train,[1], axis=1)  ## Splits DataFrame into Labels // Training sets
y_test,X_test = np.split(df_test,[1], axis=1)  ## Splits DataFrame into Labels // Test sets

tmp = []
for i in y_train['5']:    ##convert labels to array
    if i == 0:
        tmp.append([0,0,0,0,0,0,0,0,0])
    elif i == 1:
        tmp.append([0,0,0,0,0,0,0,0,1])
    elif i == 2:
        tmp.append([0,0,0,0,0,0,0,1,0])
    elif i == 3:
        tmp.append([0,0,0,0,0,0,1,0,0])
    elif i == 4:
        tmp.append([0,0,0,0,0,1,0,0,0])
    elif i == 5:
        tmp.append([0,0,0,0,1,0,0,0,0])
    elif i == 6:
        tmp.append([0,0,0,1,0,0,0,0,0])
    elif i == 7:
        tmp.append([0,0,1,0,0,0,0,0,0])
    elif i == 8:
        tmp.append([0,1,0,1,0,0,0,0,0])
    elif i == 9:
        tmp.append([1,0,0,0,0,0,0,0,0])
        
y_trainR = np.array(tmp)   

mnist_mlp = MLP(X_train)  #creates MLP

In [38]:
for i in range(5):
    feed = mnist_mlp.propagate_forwad(X_train)
    mnist_mlp.propagate_backwards(X_train,y_trainR,feed)
print(feed)

In [47]:
y_train

Unnamed: 0,5
0,0
1,4
2,1
3,9
4,2
5,1
6,3
7,1
8,4
9,3


In [50]:
y_trainR[:10]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0]])

In [52]:
feed #results

array([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1.]])