In [1]:
'''Load Libs'''
import numpy as np
import pandas as pd
import os.path
from pathlib import Path
from collections import Counter
import random
from sklearn.utils import shuffle
from time import time

In [2]:
'''Load Data And Identify Classes'''
PathTrain =  os.getcwd() + "/data/train.csv"
PathTest =  os.getcwd() + "/data/test.csv"
DFTrain = pd.read_csv(PathTrain, header=None)
DFTest = pd.read_csv(PathTest, header=None)
totalClasses = len(Counter(DFTrain.iloc[:,-1]))

In [3]:
'''One Hot Encoding for Train and Test Labels'''
oneHotTrainLables = np.zeros((len(DFTrain), totalClasses))
oneHotTestLables = np.zeros((len(DFTest), totalClasses))

for i in range(len(DFTrain)):
    oneHotTrainLables[i][DFTrain.iloc[i, -1]] = 1
    
for i in range(len(DFTest)):
    oneHotTestLables[i][DFTest.iloc[i, -1]] = 1

In [118]:
def sigmoid(z):
    #Sigmoid Function
    return 1/(1+np.exp(-z))

def ReLU(z):
    #Rectified Linear Unit
    return np.maximum(0,z)

def delSigmoid(z):
    oj = sigmoid(z)
    return np.multiply(oj, (1-oj))

def delReLU(z):
    z = np.matrix(z)
    z[z > 0] = 1
    z[z <= 0] = 0    
    return z
    

class Layer:
    
    def __init__(self, perceptron_units, perceptron_units_prev, activation_func, layer_type):
        self.type = layer_type
        self.activation = activation_func
        self.perceptron_units = perceptron_units
        self.bias = 0.001*np.random.rand(perceptron_units)
        self.weights = 0.001*np.random.rand(perceptron_units, perceptron_units_prev)
        self.inputs = None
        self.output = None
        self.grad = None
        
    def __repr__(self):
        representation = (self.type, self.perceptron_units, self.weights.shape, self.bias.shape, self.activation)
        return "<%s Layer | Num_Perceptrons_Units = %d, Weights = %s, Bias = %s, Activation = %s>" % representation

        
class NeuralNetwork:
    
    def __init__(self, list_hidden_layers, op_layers_activation, hidden_layers_activation):
        np.random.seed(100)
        self.total_layers = len(list_hidden_layers) + 1
        self.nodes_hidden_layers = list_hidden_layers
        self.layers = []
        
        for i in range(len(list_hidden_layers)):
            if i == 0:
                layer = Layer(list_hidden_layers[i], n, hidden_layers_activation, "Hidden")
                self.layers.append(layer)
            else:
                layer = Layer(list_hidden_layers[i], list_hidden_layers[i-1], hidden_layers_activation, "Hidden")
                self.layers.append(layer)
        
        layer = Layer(r, list_hidden_layers[-1], op_layers_activation, "Output")
        self.layers.append(layer) 
        
    def __repr__(self):
        layers = self.layers
        rep = ""
        print("Neural Network:")
        for i in range(len(layers)):
            rep += "Layer %d: %s\n" % (i, layers[i])
        return rep
            
    def forwardFeed(self, ip_data):
        layer = self.layers[0]
        layer.inputs = np.matrix(ip_data)
        layer.netj = np.matmul(layer.inputs, np.matrix(layer.weights).T) + layer.bias
        layer.output = self.activation(layer.netj, layer.activation)
        
        for i in range(1, len(self.layers)):
            layer = self.layers[i]
            layer.inputs = self.layers[i-1].output
            layer.netj = np.matmul(layer.inputs, np.matrix(layer.weights).T) + layer.bias
            layer.output = self.activation(layer.netj, layer.activation)        
        
    def backPropagation(self, labels):
        labels = np.matrix(labels)
        op_layer = self.layers[-1]
        diff = labels - op_layer.output
        deloj = self.delActivation(op_layer.output, op_layer.activation)
        op_layer.delnetj = -1*np.multiply(diff, deloj)
        #print(op_layer.delnetj.shape, op_layer.inputs.shape)
        op_layer.grad = [np.matmul(op_layer.delnetj.T, op_layer.inputs), op_layer.delnetj]#changed this
        
        for j in range(self.total_layers -2, -1, -1):
            layer_j = self.layers[j]
            deloj = self.delActivation(layer_j.output, layer_j.activation)#changed this
            layer_l = self.layers[j + 1]
            #print(layer_l.weights.shape, layer_l.delnetj.shape)
            delL_thetaLJ = np.matmul(layer_l.delnetj, layer_l.weights)
            layer_j.delnetj = np.multiply(delL_thetaLJ, deloj)
            #print(layer_j.delnetj.shape, layer_j.inputs.shape)
            layer_j.grad = [np.matmul(layer_j.delnetj.T, layer_j.inputs), layer_j.delnetj]#changed this
    
    def activation(self, x, activation):
        if activation == "Sigmoid":
            return sigmoid(x)
        elif activation == "ReLU":
            return ReLU(x)
    
    def delActivation(self, x, activation):
        if activation == "Sigmoid":            
            return delSigmoid(x)
        
        elif activation == "ReLU":
            return delReLU(x)
        
    def updateParams(self, lr, bSize):
        layers = self.layers 
        #print("updating")
        for layer in layers:
            gradient = layer.grad
            
            layer.weights = layer.weights - (lr/bSize)*gradient[0]
            layer.bias = layer.bias - (lr/bSize)*gradient[1]
            
    def meanSquaredError(self, Y, avg=True):
        div = 1
        if avg:
            div = len(Y)
        
        op_layer_labels = self.layers[-1].output
        error = Y - op_layer_labels
        error = np.square(error)
        error = np.sum(error)/(2*div)
        return error
    
    def trainNeuralNetwork(self, X, Labels, eta = 0.1, batch_size=100, max_epoch = 10000, epsilon=1e-10):
        lr = eta
        data = X
        labels = Labels
        
        epoch = 0
        self.forwardFeed(data)
        error_prev = self.meanSquaredError(labels)
        epoch_error_list = [error_prev]
        t0 = time()
        while epoch < max_epoch:
            t1 = time()
            epoch += 1
            #print("Epoch: ", epoch)
            
            data, labels = shuffle(data, labels)
            
            if eta == 0:
                lr = 0.5/np.sqrt(epoch)
                
            for batch_start in range(0, len(data), batch_size):
                batch_end = batch_start + batch_size
                Xb = data[batch_start : batch_end]
                Yb = labels[batch_start : batch_end]
                #print(Xb.shape, Yb.shape)
                self.forwardFeed(Xb)
                self.backPropagation(Yb)
                
                self.updateParams(lr, batch_size)
                error = self.meanSquaredError(Yb)
                
            t2 = time()   
            #self.forwardFeed(data)
            #error = self.meanSquaredError(labels)
            deltaError = np.abs(error - error_prev)
            epoch_error_list.append(error)            
            print("$$ Epoch: {} | Error = {} | DeltaError = {} | LR = {} | Epoch Train Time = {}Sec"
                  .format(epoch, round(error,6), round(deltaError,6), lr, round(t2-t1,2)))
            if deltaError < epsilon:
                break
            error_prev = error
            #epoch += 1
        
        t4 = time()
        print("\n%% Total Epochs ={} | Epsilon = {} | Total Learning Time = {}Min"
              .format(epoch, epsilon, round((t4-t0)/60,2)))
        
            
            

In [119]:
neu_net.trainNeuralNetwork(DTrain, oneHotTrainLables)

$$ Epoch: 1 | Error = 0.481202 | DeltaError = 2.921681 | LR = 0.1 | Epoch Train Time = 0.52Sec
$$ Epoch: 2 | Error = 0.480898 | DeltaError = 0.000304 | LR = 0.1 | Epoch Train Time = 0.47Sec
$$ Epoch: 3 | Error = 0.481046 | DeltaError = 0.000148 | LR = 0.1 | Epoch Train Time = 0.5Sec
$$ Epoch: 4 | Error = 0.481125 | DeltaError = 7.9e-05 | LR = 0.1 | Epoch Train Time = 0.47Sec
$$ Epoch: 5 | Error = 0.481019 | DeltaError = 0.000107 | LR = 0.1 | Epoch Train Time = 0.47Sec
$$ Epoch: 6 | Error = 0.480933 | DeltaError = 8.6e-05 | LR = 0.1 | Epoch Train Time = 0.46Sec
$$ Epoch: 7 | Error = 0.480707 | DeltaError = 0.000226 | LR = 0.1 | Epoch Train Time = 0.51Sec
$$ Epoch: 8 | Error = 0.480744 | DeltaError = 3.8e-05 | LR = 0.1 | Epoch Train Time = 0.45Sec
$$ Epoch: 9 | Error = 0.480874 | DeltaError = 0.00013 | LR = 0.1 | Epoch Train Time = 0.5Sec
$$ Epoch: 10 | Error = 0.480882 | DeltaError = 8e-06 | LR = 0.1 | Epoch Train Time = 0.48Sec
$$ Epoch: 11 | Error = 0.480999 | DeltaError = 0.000117 | 

$$ Epoch: 87 | Error = 0.242914 | DeltaError = 0.004313 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 88 | Error = 0.234438 | DeltaError = 0.008476 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 89 | Error = 0.242592 | DeltaError = 0.008154 | LR = 0.1 | Epoch Train Time = 0.74Sec
$$ Epoch: 90 | Error = 0.234065 | DeltaError = 0.008527 | LR = 0.1 | Epoch Train Time = 0.7Sec
$$ Epoch: 91 | Error = 0.245045 | DeltaError = 0.01098 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 92 | Error = 0.249016 | DeltaError = 0.003971 | LR = 0.1 | Epoch Train Time = 0.67Sec
$$ Epoch: 93 | Error = 0.241755 | DeltaError = 0.007261 | LR = 0.1 | Epoch Train Time = 0.7Sec
$$ Epoch: 94 | Error = 0.205937 | DeltaError = 0.035818 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 95 | Error = 0.234025 | DeltaError = 0.028089 | LR = 0.1 | Epoch Train Time = 0.71Sec
$$ Epoch: 96 | Error = 0.223029 | DeltaError = 0.010996 | LR = 0.1 | Epoch Train Time = 0.7Sec
$$ Epoch: 97 | Error = 0.230463 | DeltaError

$$ Epoch: 172 | Error = 0.205973 | DeltaError = 0.024323 | LR = 0.1 | Epoch Train Time = 0.89Sec
$$ Epoch: 173 | Error = 0.236905 | DeltaError = 0.030933 | LR = 0.1 | Epoch Train Time = 0.92Sec
$$ Epoch: 174 | Error = 0.206907 | DeltaError = 0.029998 | LR = 0.1 | Epoch Train Time = 0.9Sec
$$ Epoch: 175 | Error = 0.175692 | DeltaError = 0.031215 | LR = 0.1 | Epoch Train Time = 0.89Sec
$$ Epoch: 176 | Error = 0.213025 | DeltaError = 0.037333 | LR = 0.1 | Epoch Train Time = 0.86Sec
$$ Epoch: 177 | Error = 0.238488 | DeltaError = 0.025463 | LR = 0.1 | Epoch Train Time = 0.87Sec
$$ Epoch: 178 | Error = 0.207544 | DeltaError = 0.030944 | LR = 0.1 | Epoch Train Time = 0.84Sec
$$ Epoch: 179 | Error = 0.174216 | DeltaError = 0.033327 | LR = 0.1 | Epoch Train Time = 0.89Sec
$$ Epoch: 180 | Error = 0.19748 | DeltaError = 0.023263 | LR = 0.1 | Epoch Train Time = 0.88Sec
$$ Epoch: 181 | Error = 0.216903 | DeltaError = 0.019423 | LR = 0.1 | Epoch Train Time = 0.92Sec
$$ Epoch: 182 | Error = 0.22272 

$$ Epoch: 257 | Error = 0.224365 | DeltaError = 0.015047 | LR = 0.1 | Epoch Train Time = 0.65Sec
$$ Epoch: 258 | Error = 0.237136 | DeltaError = 0.012771 | LR = 0.1 | Epoch Train Time = 0.72Sec
$$ Epoch: 259 | Error = 0.219016 | DeltaError = 0.01812 | LR = 0.1 | Epoch Train Time = 0.72Sec
$$ Epoch: 260 | Error = 0.218269 | DeltaError = 0.000747 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 261 | Error = 0.235734 | DeltaError = 0.017465 | LR = 0.1 | Epoch Train Time = 0.67Sec
$$ Epoch: 262 | Error = 0.246659 | DeltaError = 0.010925 | LR = 0.1 | Epoch Train Time = 0.67Sec
$$ Epoch: 263 | Error = 0.238838 | DeltaError = 0.007821 | LR = 0.1 | Epoch Train Time = 0.82Sec
$$ Epoch: 264 | Error = 0.185142 | DeltaError = 0.053696 | LR = 0.1 | Epoch Train Time = 0.84Sec
$$ Epoch: 265 | Error = 0.252862 | DeltaError = 0.067721 | LR = 0.1 | Epoch Train Time = 0.89Sec
$$ Epoch: 266 | Error = 0.234315 | DeltaError = 0.018547 | LR = 0.1 | Epoch Train Time = 0.87Sec
$$ Epoch: 267 | Error = 0.19109

$$ Epoch: 342 | Error = 0.233871 | DeltaError = 0.002973 | LR = 0.1 | Epoch Train Time = 0.71Sec
$$ Epoch: 343 | Error = 0.232279 | DeltaError = 0.001591 | LR = 0.1 | Epoch Train Time = 0.69Sec
$$ Epoch: 344 | Error = 0.244982 | DeltaError = 0.012702 | LR = 0.1 | Epoch Train Time = 0.66Sec
$$ Epoch: 345 | Error = 0.206196 | DeltaError = 0.038786 | LR = 0.1 | Epoch Train Time = 0.66Sec
$$ Epoch: 346 | Error = 0.246289 | DeltaError = 0.040094 | LR = 0.1 | Epoch Train Time = 0.67Sec
$$ Epoch: 347 | Error = 0.235103 | DeltaError = 0.011186 | LR = 0.1 | Epoch Train Time = 0.65Sec
$$ Epoch: 348 | Error = 0.244397 | DeltaError = 0.009294 | LR = 0.1 | Epoch Train Time = 0.66Sec
$$ Epoch: 349 | Error = 0.234799 | DeltaError = 0.009599 | LR = 0.1 | Epoch Train Time = 0.68Sec
$$ Epoch: 350 | Error = 0.210159 | DeltaError = 0.024639 | LR = 0.1 | Epoch Train Time = 0.7Sec
$$ Epoch: 351 | Error = 0.225046 | DeltaError = 0.014887 | LR = 0.1 | Epoch Train Time = 0.69Sec
$$ Epoch: 352 | Error = 0.20283

KeyboardInterrupt: 

In [1]:
neu_net = NeuralNetwork(nodesInHiddenLayers, "Sigmoid", "ReLu")
neu_net

NameError: name 'NeuralNetwork' is not defined

In [116]:
'''Global Params'''
M = 100 #MiniBatch Size
n = len(DFTrain.columns)-1
nodesInHiddenLayers = [100]
r = totalClasses

In [124]:
#for layer in neu_net.layers:
layer = neu_net.layers[-1]
for x in layer.output:
    print(np.amax(x))

0.681159186392604
0.5827889421705694
0.26025569013431415
0.38946628022928104
0.14262154921532913
0.2917815823719552
0.5834496501560641
0.688159408747932
0.984204348025801
0.2194184703492886
0.5759445116655726
0.8907884870356644
0.5410640257738095
0.4116103498381027
0.16230006872594094
0.29516341557941056
0.2899631887022096
0.3191239784258544
0.8699641692786523
0.45651945609314276
0.5069547340918302
0.976643090748276
0.9866198691646545
0.43752611967247274
0.8439553414892211
0.6210068766678049
0.47690509333823516
0.3012727427062489
0.9752187520848697
0.7668832380161009
0.8125199991045112
0.2745564229449183
0.3499172946144694
0.3251440549094864
0.5241635288158171
0.33939220463237607
0.32666784492959444
0.33321499213119743
0.27641797013997177
0.8808599949277839
0.9854397296350409
0.7702989906156735
0.3941931772577447
0.9043715122923546
0.968076892347778
0.2198123509643054
0.7731018430416955
0.37230192177201976
0.8530828360220072
0.9995908606646203
0.26013360014313697
0.5942934145253023
0.7

In [9]:
DTrain = DFTrain.iloc[:,:-1].to_numpy()

Neural Network:


Layer 0: <Hidden Layer | Num_Perceptrons_Units = 10, Weights = (10, 784), Bias = (10,), Activation = Sigmoid>
Layer 1: <Output Layer | Num_Perceptrons_Units = 26, Weights = (26, 10), Bias = (26,), Activation = Sigmoid>

In [75]:
from glob import glob
files = glob(os.getcwd()+ "\\data\\*csv")
dataframes = [pd.read_csv(f, header = None) for f in files]
dataframes[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,25
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6495,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13
6496,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
6497,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
6498,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


In [154]:
np.random.seed(100)
np.random.random_integers(0,9, (5,5))

  


array([[8, 8, 3, 7, 7],
       [0, 4, 2, 5, 2],
       [2, 2, 1, 0, 8],
       [4, 0, 9, 6, 2],
       [4, 1, 5, 3, 4]])

In [160]:
np.random.seed(100)
p = np.random.random_integers(0,9, (5,20))
q = np.random.random_integers(0,9, (5,6))

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [161]:
p,q

(array([[8, 8, 3, 7, 7, 0, 4, 2, 5, 2, 2, 2, 1, 0, 8, 4, 0, 9, 6, 2],
        [4, 1, 5, 3, 4, 4, 3, 7, 1, 1, 7, 7, 0, 2, 9, 9, 3, 2, 5, 8],
        [1, 0, 7, 6, 2, 0, 8, 2, 5, 1, 8, 1, 5, 4, 2, 8, 3, 5, 0, 9],
        [3, 6, 3, 4, 7, 6, 3, 9, 0, 4, 4, 5, 7, 6, 6, 2, 4, 2, 7, 1],
        [6, 6, 0, 7, 2, 3, 5, 4, 2, 4, 3, 7, 9, 0, 0, 5, 9, 6, 6, 5]]),
 array([[6, 4, 7, 3, 9, 2],
        [3, 8, 7, 1, 5, 9],
        [3, 0, 6, 2, 3, 4],
        [8, 9, 8, 5, 2, 7],
        [5, 9, 0, 9, 8, 6]]))

In [162]:
from sklearn.utils import shuffle

pdash, qdash = shuffle(p,q)
pdash, qdash

(array([[3, 6, 3, 4, 7, 6, 3, 9, 0, 4, 4, 5, 7, 6, 6, 2, 4, 2, 7, 1],
        [4, 1, 5, 3, 4, 4, 3, 7, 1, 1, 7, 7, 0, 2, 9, 9, 3, 2, 5, 8],
        [8, 8, 3, 7, 7, 0, 4, 2, 5, 2, 2, 2, 1, 0, 8, 4, 0, 9, 6, 2],
        [1, 0, 7, 6, 2, 0, 8, 2, 5, 1, 8, 1, 5, 4, 2, 8, 3, 5, 0, 9],
        [6, 6, 0, 7, 2, 3, 5, 4, 2, 4, 3, 7, 9, 0, 0, 5, 9, 6, 6, 5]]),
 array([[8, 9, 8, 5, 2, 7],
        [3, 8, 7, 1, 5, 9],
        [6, 4, 7, 3, 9, 2],
        [3, 0, 6, 2, 3, 4],
        [5, 9, 0, 9, 8, 6]]))

In [122]:
sigmoid(np.random.rand(5,5))

array([[0.63260414, 0.56914641, 0.60456376, 0.69947017, 0.50117971],
       [0.5303549 , 0.66167087, 0.6954773 , 0.53412352, 0.6399376 ],
       [0.7091629 , 0.55211061, 0.5461999 , 0.52706773, 0.55470451],
       [0.72683506, 0.69246806, 0.54287967, 0.69343437, 0.56809273],
       [0.60628054, 0.71910568, 0.69373714, 0.58324576, 0.54374052]])

In [139]:

np.random.shuffle(p)
p

array([[0.97862378, 0.81168315, 0.17194101, 0.81622475, 0.27407375],
       [0.89132195, 0.20920212, 0.18532822, 0.10837689, 0.21969749],
       [0.43170418, 0.94002982, 0.81764938, 0.33611195, 0.17541045],
       [0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333],
       [0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886]])

(array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   1,   1,   1,   2,   2,   5,  23,  45,  60,  72,
         89, 136, 200, 223, 201,  74,   0,   0,   0,   0,   0,   0,   0,
          0,   1,   1,   2,   1,   1,   1,   3,  30, 109, 136, 177, 214,
        235, 244, 229, 210, 156, 114,  91,  27,   0,   0,   0,   0,   0,
          0,   0,   0,   3,  14,  27,  41,  72, 163