In [3]:
'''Load Libs'''
import numpy as np
import pandas as pd
import os.path
from pathlib import Path
from collections import Counter
import random
from sklearn.utils import shuffle
from time import time

In [4]:
'''Load Data And Identify Classes'''
PathTrain =  os.getcwd() + "/data/train.csv"
PathTest =  os.getcwd() + "/data/test.csv"
DFTrain = pd.read_csv(PathTrain, header=None)
DFTest = pd.read_csv(PathTest, header=None)
totalClasses = len(Counter(DFTrain.iloc[:,-1]))

In [5]:
'''One Hot Encoding for Train and Test Labels'''
oneHotTrainLables = np.zeros((len(DFTrain), totalClasses))
oneHotTestLables = np.zeros((len(DFTest), totalClasses))

for i in range(len(DFTrain)):
    oneHotTrainLables[i][DFTrain.iloc[i, -1]] = 1
    
for i in range(len(DFTest)):
    oneHotTestLables[i][DFTest.iloc[i, -1]] = 1

In [14]:
def sigmoid(z):
    #Sigmoid Function
    return 1/(1+np.exp(-z))

def ReLU(z):
    #Rectified Linear Unit
    return np.maximum(0,z)

def delSigmoid(oj):
    #oj = sigmoid(z)
    return np.multiply(oj, (1-oj))

def delReLU(z):
    z = np.matrix(z)
    z[z > 0] = 1
    z[z <= 0] = 0    
    return z
    

class Layer:
    
    def __init__(self, perceptron_units, n_inputs, activation_func, layer_type):
        self.type = layer_type
        self.activation = activation_func
        self.perceptron_units = perceptron_units
        self.bias = np.random.normal(0,0.005, perceptron_units)
        self.weights = np.random.normal(0,0.005, perceptron_units * (n_inputs)).reshape(perceptron_units, n_inputs)
        self.inputs = None
        self.output = None
        self.grad = None
        
    def __repr__(self):
        representation = (self.type, self.perceptron_units, self.weights.shape, self.bias.shape, self.activation)
        return "<%s Layer | Num_Perceptrons_Units = %d, Weights = %s, Bias = %s, Activation = %s>" % representation

        
class NeuralNetwork:
    
    def __init__(self, list_hidden_layers, op_layers_activation, hidden_layers_activation):
        np.random.seed(1000)
        self.total_layers = len(list_hidden_layers) + 1
        self.nodes_hidden_layers = list_hidden_layers
        self.layers = []
        
        for i in range(len(list_hidden_layers)):
            if i == 0:
                layer = Layer(list_hidden_layers[i], n, hidden_layers_activation, "Hidden")
                self.layers.append(layer)
            else:
                layer = Layer(list_hidden_layers[i], list_hidden_layers[i-1], hidden_layers_activation, "Hidden")                
                self.layers.append(layer)
        
        layer = Layer(r, list_hidden_layers[-1], op_layers_activation, "Output")
        self.layers.append(layer) 
        
    def __repr__(self):
        layers = self.layers
        rep = ""
        print("Neural Network:")
        for i in range(len(layers)):
            rep += "Layer %d: %s\n" % (i, layers[i])
        return rep
            
    def forwardFeed(self, ip_data):
        layer = self.layers[0]
        layer.inputs = np.matrix(ip_data)
        layer.netj = np.matmul(layer.inputs, np.matrix(layer.weights).T) + layer.bias
        layer.output = self.activation(layer.netj, layer.activation)
        #print("hidden", layer.output.shape)
        for i in range(1, len(self.layers)):
            layer = self.layers[i]
            layer.inputs = self.layers[i-1].output
            layer.netj = np.matmul(layer.inputs, np.matrix(layer.weights).T) + layer.bias
            layer.output = self.activation(layer.netj, layer.activation)
            #print("op", layer.output.shape)

    '''       
    def backPropagation(self, labels):
        labels = np.matrix(labels)
        op_layer = self.layers[-1]
        diff = labels - op_layer.output
        deloj = self.delActivation(op_layer.output, op_layer.activation)
        op_layer.delnetj = -1*np.multiply(diff, deloj)
        #print(op_layer.delnetj.shape, op_layer.inputs.shape)
        op_layer.grad = [np.matmul(op_layer.delnetj.T, op_layer.inputs), op_layer.delnetj]#changed this
        
        for j in range(self.total_layers -2, -1, -1):
            layer_j = self.layers[j]
            deloj = self.delActivation(layer_j.output, layer_j.activation)#changed this
            layer_l = self.layers[j + 1]
            #print(layer_l.weights.shape, layer_l.delnetj.shape)
            delL_thetaLJ = np.matmul(layer_l.delnetj, layer_l.weights)
            layer_j.delnetj = np.multiply(delL_thetaLJ, deloj)
            #print("bkPrp",layer_j.delnetj.shape, layer_j.inputs.shape)
            layer_j.grad = [np.matmul(layer_j.delnetj.T, layer_j.inputs), layer_j.delnetj]#changed this
    '''

    def backwardPropagation(self, labels):
        output_layer = self.layers[-1]
        error = labels - output_layer.output
        delOj = self.delActivation(output_layer.output, output_layer.activation)
        output_layer.grad = -1*np.multiply(error, delOj)
        for i in reversed(range(self.total_layers-1)):
            current_layer = self.layers[i]
            next_layer = self.layers[i + 1]
            error = np.dot(next_layer.grad, next_layer.weights)
            delOj = self.delActivation(current_layer.output, current_layer.activation)
            current_layer.grad = np.multiply(error, delOj)
    
    def activation(self, x, activation):
        if activation == "Sigmoid":
            return sigmoid(x)
        elif activation == "ReLU":
            return ReLU(x)
    
    def delActivation(self, x, activation):
        if activation == "Sigmoid":            
            return delSigmoid(x)
        
        elif activation == "ReLU":
            return delReLU(x)
        
    def updateParams(self, lr, bSize):
        layers = self.layers 
        count = 0
        #print("updating")
        for layer in layers:
            gradient = layer.grad
            layer.weights = layer.weights - (lr/bSize)*np.dot(gradient.T, layer.inputs)
            #print("b",layer.bias.shape)
            layer.bias = layer.bias - (lr/bSize)*np.mean(gradient)
            #print("b'",layer.bias.shape)
            #count += 1
            
    def meanSquaredError(self, Y, avg=True):
        div = 1
        #if avg:
        div = len(Y)
        
        op_layer_labels = self.layers[-1].output
        error = Y - op_layer_labels
        error = np.square(error)
        error = np.sum(error)/(2*div)
        return error
    
    def trainNeuralNetwork(self, X, Labels, eta = 0.1, batch_size=100, max_epoch = 10000, epsilon=1e-10):
        lr = eta
        data = X
        labels = Labels
        
        epoch = 0
        self.forwardFeed(data)
        #self.forwardFeed(data[1:100])
        #self.forwardFeed(data)
        error_prev = self.meanSquaredError(labels)
        #return
        #error_prev = np.inf
        epoch_error_list = [error_prev]
        t0 = time()
        while epoch < max_epoch:
            t1 = time()
            epoch += 1
            
            data, labels = shuffle(data, labels)
            
            if eta == 0:
                lr = 0.5/np.sqrt(epoch)
                
            for batch_start in range(0, len(data), batch_size):
                batch_end = batch_start + batch_size
                Xb = data[batch_start : batch_end]
                Yb = labels[batch_start : batch_end]
                #print(Xb.shape, Yb.shape)
                self.forwardFeed(Xb)
                self.backwardPropagation(Yb)
                
                self.updateParams(lr, batch_size)
                #error = self.meanSquaredError(Yb)
                #return
                
            t2 = time()   
            self.forwardFeed(data)
            error = self.meanSquaredError(labels)
            deltaError = error - error_prev
            epoch_error_list.append(error)            
            print("$$ Epoch: {} | Error = {} | DeltaError = {} | LR = {} | Epoch Train Time = {}Sec"
                  .format(epoch, round(error,6), round(deltaError,6), lr, round(t2-t1,2)))
            if np.abs(deltaError) < epsilon:
                break
            error_prev = error
        
        t4 = time()
        print("\n%% Total Epochs ={} | Epsilon = {} | Total Learning Time = {}Min"
              .format(epoch, epsilon, round((t4-t0)/60,2)))
        
            
            

In [15]:
'''Global Params'''
M = 100 #MiniBatch Size
n = len(DFTrain.columns)-1
nodesInHiddenLayers = [100]
r = totalClasses
neu_net = NeuralNetwork(nodesInHiddenLayers, "Sigmoid", "ReLU")
print(neu_net)
neu_net.trainNeuralNetwork(DTrain, oneHotTrainLables, eta=0.1)

Neural Network:
Layer 0: <Hidden Layer | Num_Perceptrons_Units = 100, Weights = (100, 784), Bias = (100,), Activation = ReLU>
Layer 1: <Output Layer | Num_Perceptrons_Units = 26, Weights = (26, 100), Bias = (26,), Activation = Sigmoid>

$$ Epoch: 1 | Error = 0.5 | DeltaError = -2.69027 | LR = 0.1 | Epoch Train Time = 0.36Sec
$$ Epoch: 2 | Error = 0.5 | DeltaError = 0.0 | LR = 0.1 | Epoch Train Time = 0.26Sec

%% Total Epochs =2 | Epsilon = 1e-10 | Total Learning Time = 0.01Min


In [9]:
DTrain = DFTrain.iloc[:,:-1].to_numpy()

In [244]:
#for layer in neu_net.layers:
neu_net.forwardFeed(DTrain)
layer = neu_net.layers[-1]
for x in layer.output:
    print(np.argmax(x))

0
5
24
24
8
24
16
11
15
5
22
10
22
1
9
25
12
2
12
10
10
5
7
2
3
23
14
1
19
23
7
8
6
4
8
24
5
20
7
3
25
15
19
1
1
13
14
15
17
1
15
1
24
7
0
14
14
14
21
13
1
4
4
10
19
1
2
4
4
4
10
23
6
23
10
13
1
15
12
16
15
18
12
13
12
16
17
1
14
21
25
12
8
19
14
5
10
5
25
9
3
12
13
3
13
1
9
13
14
21
14
4
6
23
21
7
0
15
0
15
25
14
12
25
4
12
5
24
21
10
13
5
8
7
22
16
17
3
13
13
7
4
9
11
15
15
5
17
15
21
13
17
13
12
18
1
8
6
25
20
0
24
1
19
11
11
4
23
14
18
16
13
15
11
15
18
23
9
5
12
11
7
21
11
6
8
13
3
2
5
7
13
22
11
20
12
13
21
6
11
22
0
10
0
2
19
18
13
9
22
19
19
3
25
6
11
23
25
16
11
3
12
14
2
8
3
23
3
19
19
23
14
25
12
23
1
19
1
10
0
10
14
8
13
7
16
16
17
21
19
18
23
13
13
10
20
5
1
1
0
24
3
21
21
17
19
19
2
4
21
5
2
12
0
1
23
18
2
7
25
25
14
18
24
23
7
8
5
21
15
13
23
1
3
2
20
2
20
9
6
15
1
22
24
17
25
14
2
19
13
18
4
1
5
14
21
1
5
10
17
20
12
16
9
19
12
5
21
20
0
21
5
23
18
8
1
19
6
8
21
5
19
14
3
7
15
2
7
14
10
23
19
15
23
1
25
25
25
16
7
19
15
3
15
0
13
3
10
16
2
20
11
11
15
18
19
8
1
2
11
21


14
2
7
16
0
2
7
22
1
13
8
19
23
18
23
12
22
11
16
22
3
24
22
5
10
24
25
21
22
11
6
24
7
13
12
0
2
6
0
1
13
25
0
22
16
13
3
14
23
10
4
8
16
16
0
17
4
23
23
10
21
0
23
0
2
14
15
5
0
1
6
13
10
1
11
8
14
13
22
13
11
10
5
0
24
25
6
1
0
7
22
8
23
1
23
1
7
1
20
14
24
0
11
5
5
7
5
4
14
1
18
1
23
0
0
5
0
0
7
25
2
14
19
23
8
19
21
7
3
14
14
18
7
13
12
23
14
3
21
7
1
4
20
0
0
2
18
10
19
21
11
2
18
14
5
11
12
2
11
19
13
5
19
2
21
7
18
12
2
23
11
25
0
4
19
5
10
1
10
12
0
3
19
6
7
11
14
13
4
13
21
19
16
3
25
7
21
3
7
5
9
1
23
0
11
24
22
20
17
24
5
4
14
14
0
12
12
15
1
22
15
4
7
2
25
4
1
11
16
24
2
14
8
16
20
20
21
12
24
10
17
19
15
7
24
1
17
14
17
15
19
14
20
22
20
5
13
6
4
21
7
17
20
23
24
13
7
4
20
2
1
15
23
25
17
16
8
10
14
23
1
14
20
5
24
24
21
5
6
19
8
14
23
7
10
15
20
20
7
11
18
12
20
10
10
8
20
2
15
11
15
9
2
24
13
17
12
11
7
2
21
14
24
7
13
24
14
23
8
3
2
4
21
19
17
14
22
23
2
22
2
0
7
2
20
19
25
4
15
8
25
0
18
14
12
1
7
20
20
20
25
2
15
9
13
7
20
18
14
0
12
1
16
10
3
0
2
1
1
24
1
25
18
18
1

12
2
12
15
1
17
23
6
24
23
21
23
2
19
8
17
6
1
22
1
7
8
7
4
13
22
25
1
20
11
4
14
19
1
20
6
10
22
7
10
8
14
21
9
2
1
19
13
1
13
24
25
23
14
2
1
1
12
21
0
2
23
7
25
2
14
12
8
0
7
22
1
7
14
13
5
7
10
24
8
19
12
11
15
6
24
8
4
17
20
6
8
8
13
0
21
4
7
1
1
20
0
20
5
8
21
16
1
18
21
8
10
6
0
14
0
13
3
24
14
4
18
21
0
5
11
19
18
23
4
3
1
0
23
6
4
19
1
23
22
8
1
20
6
14
11
18
12
23
11
23
17
6
17
4
7
10
16
5
19
10
13
8
13
16
0
2
1
21
9
19
12
3
0
4
10
10
8
6
19
11
2
4
15
1
19
10
18
19
7
7
15
12
22
16
25
1
12
18
25
0
13
22
8
13
14
16
11
7
9
0
18
14
1
1
16
4
23
7
21
19
12
19
23
7
22
10
1
21
23
1
14
7
13
15
14
0
15
6
8
14
7
13
14
21
16
12
23
8
10
14
1
24
5
6
4
14
18
11
4
17
5
14
11
19
14
20
13
17
11
1
11
6
12
19
13
10
20
14
3
19
12
3
2
12
0
17
15
0
17
0
25
12
1
21
1
1
14
6
5
21
23
0
13
2
22
7
9
2
10
14
10
2
2
15
13
9
15
4
1
18
18
3
14
19
2
1
23
5
0
5
20
6
9
13
14
12
3
25
25
0
10
4
7
7
11
14
6
18
12
19
4
10
7
19
24
0
18
25
3
7
13
2
9
15
0
24
23
14
15
9
19
25
0
15
11
17
7
18
12
25
18
0
14
22
19
1
1
7

5
7
8
18
10
1
23
3
6
12
8
21
18
22
0
9
20
5
22
24
6
10
10
14
25
18
21
15
0
24
8
25
12
10
12
23
18
5
22
8
10
23
4
21
21
8
23
20
9
18
1
2
18
10
3
24
21
16
17
13
5
3
2
24
4
19
1
20
18
18
18
1
9
14
18
8
12
20
18
13
15
22
22
18
25
11
2
23
1
7
6
24
8
19
10
5
15
15
17
13
12
15
1
1
18
17
19
3
23
1
19
0
2
19
5
18
5
10
6
2
10
5
18
21
19
5
1
0
15
25
19
0
7
24
18
23
14
10
5
21
11
1
7
5
13
3
18
12
2
22
17
2
17
3
1
22
23
17
20
10
5
24
5
9
19
4
4
21
15
20
19
7
10
2
1
8
4
14
13
13
22
11
6
15
21
3
22
23
2
15
6
12
23
21
1
0
13
13
11
22
2
23
24
17
4
22
5
12
3
2
12
25
19
1
18
25
3
12
17
14
22
14
0
7
9
24
20
25
20
23
1
16
6
16
14
1
6
7
18
14
9
10
6
11
11
1
1
15
10
13
17
17
18
3
9
8
14
23
7
10
13
13
2
11
19
24
1
12
10
14
19
14
25
7
11
22
16
0
6
22
14
3
23
16
20
25
13
2
19
17
20
8
2
2
16
24
24
17
1
17
5
11
20
3
4
23
22
15
12
2
1
9
9
4
13
13
3
8
5
16
18
5
19
14
15
21
0
11
2
7
21
16
9
23
8
15
5
18
10
14
25
6
15
14
1
19
7
21
19
8
17
4
7
1
17
2
20
13
22
1
18
9
24
13
10
23
3
2
25
12
13
15
15
4
6
18
25
19
10
11
5


In [9]:
DTrain = DFTrain.iloc[:,:-1].to_numpy()

In [247]:
DFTest.iloc[:20,-1]

0     25
1     13
2      6
3      6
4      7
5     20
6      5
7     17
8      1
9     14
10    11
11    23
12    11
13     6
14    18
15    21
16    17
17     8
18    14
19    15
Name: 784, dtype: int64

In [75]:
from glob import glob
files = glob(os.getcwd()+ "\\data\\*csv")
dataframes = [pd.read_csv(f, header = None) for f in files]
dataframes[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,25
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6495,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13
6496,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
6497,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
6498,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


In [154]:
np.random.seed(100)
np.random.random_integers(0,9, (5,5))

  


array([[8, 8, 3, 7, 7],
       [0, 4, 2, 5, 2],
       [2, 2, 1, 0, 8],
       [4, 0, 9, 6, 2],
       [4, 1, 5, 3, 4]])

In [160]:
np.random.seed(100)
p = np.random.random_integers(0,9, (5,20))
q = np.random.random_integers(0,9, (5,6))

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [161]:
p,q

(array([[8, 8, 3, 7, 7, 0, 4, 2, 5, 2, 2, 2, 1, 0, 8, 4, 0, 9, 6, 2],
        [4, 1, 5, 3, 4, 4, 3, 7, 1, 1, 7, 7, 0, 2, 9, 9, 3, 2, 5, 8],
        [1, 0, 7, 6, 2, 0, 8, 2, 5, 1, 8, 1, 5, 4, 2, 8, 3, 5, 0, 9],
        [3, 6, 3, 4, 7, 6, 3, 9, 0, 4, 4, 5, 7, 6, 6, 2, 4, 2, 7, 1],
        [6, 6, 0, 7, 2, 3, 5, 4, 2, 4, 3, 7, 9, 0, 0, 5, 9, 6, 6, 5]]),
 array([[6, 4, 7, 3, 9, 2],
        [3, 8, 7, 1, 5, 9],
        [3, 0, 6, 2, 3, 4],
        [8, 9, 8, 5, 2, 7],
        [5, 9, 0, 9, 8, 6]]))

In [162]:
from sklearn.utils import shuffle

pdash, qdash = shuffle(p,q)
pdash, qdash

(array([[3, 6, 3, 4, 7, 6, 3, 9, 0, 4, 4, 5, 7, 6, 6, 2, 4, 2, 7, 1],
        [4, 1, 5, 3, 4, 4, 3, 7, 1, 1, 7, 7, 0, 2, 9, 9, 3, 2, 5, 8],
        [8, 8, 3, 7, 7, 0, 4, 2, 5, 2, 2, 2, 1, 0, 8, 4, 0, 9, 6, 2],
        [1, 0, 7, 6, 2, 0, 8, 2, 5, 1, 8, 1, 5, 4, 2, 8, 3, 5, 0, 9],
        [6, 6, 0, 7, 2, 3, 5, 4, 2, 4, 3, 7, 9, 0, 0, 5, 9, 6, 6, 5]]),
 array([[8, 9, 8, 5, 2, 7],
        [3, 8, 7, 1, 5, 9],
        [6, 4, 7, 3, 9, 2],
        [3, 0, 6, 2, 3, 4],
        [5, 9, 0, 9, 8, 6]]))

In [122]:
sigmoid(np.random.rand(5,5))

array([[0.63260414, 0.56914641, 0.60456376, 0.69947017, 0.50117971],
       [0.5303549 , 0.66167087, 0.6954773 , 0.53412352, 0.6399376 ],
       [0.7091629 , 0.55211061, 0.5461999 , 0.52706773, 0.55470451],
       [0.72683506, 0.69246806, 0.54287967, 0.69343437, 0.56809273],
       [0.60628054, 0.71910568, 0.69373714, 0.58324576, 0.54374052]])

In [139]:

np.random.shuffle(p)
p

array([[0.97862378, 0.81168315, 0.17194101, 0.81622475, 0.27407375],
       [0.89132195, 0.20920212, 0.18532822, 0.10837689, 0.21969749],
       [0.43170418, 0.94002982, 0.81764938, 0.33611195, 0.17541045],
       [0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333],
       [0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886]])

(array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   1,   1,   1,   2,   2,   5,  23,  45,  60,  72,
         89, 136, 200, 223, 201,  74,   0,   0,   0,   0,   0,   0,   0,
          0,   1,   1,   2,   1,   1,   1,   3,  30, 109, 136, 177, 214,
        235, 244, 229, 210, 156, 114,  91,  27,   0,   0,   0,   0,   0,
          0,   0,   0,   3,  14,  27,  41,  72, 163