In [1]:
#0 OR 1 NUERAL NETWORK DETECTOR ~95% accuracy, although may vary from low to high 90's for accuracy

#BY: Rahul.Gudise
# https://github.com/Chipsandnonos

#Trained using the MNIST database, acessed here http://yann.lecun.com/exdb/mnist/
#Developed using NUMPY

#Feel free to fork, and send in suggestions for improvements

import numpy
from mnist import MNIST


In [2]:
ndata = MNIST('samples')
images_train, labels_train = ndata.load_training()
images_test, labels_test = ndata.load_testing()

In [3]:
#The "squish" function choosen for this particular Nueral Network is the sigmoid (logistic) curve
def sigmoid(x): 
    return 1/(1+numpy.exp(-x))

#This is the derivative of the sigmoid curve
def sigmoid_p(x):
    return sigmoid(x) * (1-sigmoid(x))

In [4]:
#NETWORK STRUCTURE
# The goal of this Nueral Network is to detect whether a 28x28 grayscale imageis that of a 1 or 0

#Due to there only being 2 answers, a compact Neural Network structure was picked
#Layer 1 consists of 784 nuerons, each corresponding to a pixel within the original 28x28 image
#Layer 2 consists of 5 nuerons
#Layer 3 consists of 1 nueron, which when it has an activation of 1, signifies the input was a 1, and when 0, means the input was 0


#ALL GLOBAL VARIABLES
pic_size = 28              #The size of the original grayscale picture                
lay1_size = pic_size**2    #Size of the first layer (28x28)
lay2_size = 5           #Size of the second layer (5)
lay3_size = 1              #Size of the third layer (1))

#HYPERPARAMTER 
learn_rate = .01   #The learning rate of the nueral network, a small value was chosen so that the change vector would not
                     #overshoot when adjusting the weights and biases for the cost function

In [5]:
#Will Pick out only 1's and 0's in MNIST TRAINING DATASET
num_shift = 0
for i in range(len(labels_train)):
    
    if not (labels_train[i-num_shift] == 0 or labels_train[i-num_shift] == 1) :
        images_train.pop(i-num_shift)
        labels_train.pop(i-num_shift)
        num_shift = num_shift +1
   
        

#Will Pick out only 1's and 0's in MNIST TESTING DATASET
num_shift = 0

for i in range(len(labels_test)):
    
    if not (labels_test[i-num_shift] == 0 or labels_test[i-num_shift] == 1) :
        labels_test.pop(i-num_shift)
        images_test.pop(i-num_shift)
        num_shift = num_shift +1


In [6]:
#A function which will take all grayscale pixel values in an original MNIST data piece and transfer into the activations
#of the first layer nuerons
#The value "scale", is there simply to convert the grayscale value (0-255), into a number which can be inputted into the nueron
#Nueron domain is 0-1

def img_to_activation(layer1A, image):
    scale = 1/255
    
    for i in range(lay1_size):
        layer1A[i] = image[i]*scale
    
    return layer1A

In [7]:
#A function which will take in the activations of 1st layer nuerons, and with the weights and biases of all layers, to 
#computer the appropriate activations of 2nd and 3rd layer nuerons 
def find_activation(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32): #need to use the biases too
    
    
    layer2A = numpy.matmul(weight21, layer1A)
    
    for i in range(lay2_size):
        layer2A[i] = layer2A[i] + bias21[i]
    
    for i in range(lay2_size):
        layer2A[i] = sigmoid(layer2A[i])
    
    layer3A = numpy.matmul(weight32, layer2A)
    layer3A = layer3A + bias32
    layer3A = sigmoid(layer3A)
   
    return layer2A, layer3A


In [8]:
#A function which will calculate the change vector which points in the direction of greatest ascent of the cost function
#When given the all activations of all nuerons on all layers, and all weights and biases 
def deriv_fill(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32, actual):
    
    #Change Matrices
    
    cweightL1 = numpy.zeros((lay2_size,lay1_size), dtype = float)
    cweightL2 = numpy.zeros((lay2_size),dtype = float )
    
    cbiasL1 = numpy.zeros((lay2_size),dtype = float )
    cbiasL2 = numpy.zeros((lay3_size),dtype = float )
   
    #Constants needed to compute derivatives 
    pred = layer3A
    z = 0
    for i in range(lay2_size):
        z = layer2A[i] * weight32[i]
    z = z + bias32
    
    num_w_2 = lay2_size 
    num_w_1 = lay1_size*lay2_size 
    #--------------------------------------- Derivatives
          
    dc_dsig = 2*(pred-actual)
    dsig_dz = sigmoid_p(z)
    
    #dz_d(any layer 2 weight) = activation, bias = 1
    
    for i in range(lay2_size):
        dz_dw0i = layer2A[i]
        cweightL2[i] = dc_dsig*dsig_dz*dz_dw0i
    cbiasL2[0] = dc_dsig*dsig_dz
    
    for i in range(lay2_size):
        cbiasL1[i] = 1
        dz_da2i = weight32[i]
        for y in range (lay1_size):
            da2i_dwiy = layer1A[y]
            cweightL1[i][y] =  dc_dsig*dsig_dz*dz_da2i*da2i_dwiy

    
  
    return cweightL1,cweightL2,cbiasL1,cbiasL2

In [9]:
#A function which will update all parameters (weights and biases), based on previously calculated direction vector
def update_val(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32, cweightL1,cweightL2,cbiasL1,cbiasL2 ):
     #updates weights connecting layers 1-2    
    for i in range (lay2_size):
        for y in range (lay1_size):
            weight21[i][y] = weight21[i][y] - (learn_rate*cweightL1[i][y])
    
        #updates weights connected layers 2-3
    for i in range (lay2_size):
        weight32[i] = weight32[i] - (learn_rate*cweightL2[i])
    
        #updates biases in layer 2 nuerons
    for i in range (lay2_size):
        bias21[i] = bias21[i] - (learn_rate*bias21[i])
    
        #updates biases in layer 3 nuerons
    bias32[0] = bias32[0] - (learn_rate*cbiasL2[0])
    
    return weight21, weight32, bias21, bias32

In [10]:
#A function which will input a TEST data piece and see the nueral network's output
def ask(image, weight21, weight32, bias21, bias32,layer1A,layer2A,layer3A):
    layer1A = img_to_activation(layer1A, image)
    layer2A, layer3A = find_activation(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32)
    
    return layer3A   

In [11]:
#A function which handles the output of the network after being asked to identify a data piece 
def test(index, weight21,weight32,bias21,bias32,layer1A,layer2A,layer3A):
    pred = ask(images_test[index], weight21, weight32, bias21, bias32,layer1A,layer2A,layer3A)
    actual = labels_test[index]
    num = 0
    succeed = 0.0
    adj = 0
    cost = (pred - actual)**2
    if (pred>.5):
        num = 1
        adj = 0
    else:
        num = 0
        adj = 1
    
    if (num == actual):
        succeed = 1.0
    else:
        succeed = 0.0
    
    print(f"Computer: {num}, with a {adj - pred}%")
    print(f"Actually: {actual}, with a cost of {cost}")
    return succeed

In [12]:
#The function which ties together all of the above training functions, and trains the nueral network
def train():
       
    #To avoid refering to global variables as much, all matrices and vectors were defined here, and transfered between functions as needed
    
    #Activations -- All Activations of nuerons are held here
    layer1A = numpy.zeros((lay1_size),dtype = float )
    layer2A = numpy.zeros((lay2_size),dtype = float )
    layer3A = numpy.zeros((lay3_size),dtype = float )

    #Weights -- All weights are stored here
    weight21 = numpy.zeros((lay2_size,lay1_size), dtype = float)
    weight32 = numpy.zeros((lay2_size),dtype = float )

    #Biases -- All biases are stored here
    bias21 = numpy.zeros((lay2_size),dtype = float )
    bias32 = numpy.zeros((lay3_size),dtype = float )
    
    #Weights and biases start of random, as such this will randomize all enteries in the weight and bias matrices 
    
    #Randomizes all biases and weights in layers 1-2
    for i in range(lay2_size):
        bias21[i] = numpy.random.randn()
        for y in range(lay1_size):
            weight21[i][y] = numpy.random.randn()

    #Randomizes all biases and weights in layers 2-3
    for i in range(5):
        weight32[i] = numpy.random.randn()

    bias32[0] = numpy.random.randn()

    #--------------- Training Process
           #1. Transfers the pixel grayscale values into Layer 1 nueron activations
           #2. Finds the activations of all the other layers
           #3. Finds the appropriate direction vector to find direction of greatest ascent in cost function
           #4. Updates all bias and weights based on the aforementioned direction vector
            
    for i in range (len(labels_train)):
        layer1A = img_to_activation(layer1A, images_train[i])
        layer2A, layer3A = find_activation(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32)
        cweightL1,cweightL2,cbiasL1,cbiasL2 = deriv_fill(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32, labels_train[i])
        weight21, weight32, bias21, bias32 = update_val(layer1A, layer2A, layer3A, weight21, weight32, bias21, bias32, cweightL1,cweightL2,cbiasL1,cbiasL2 )
        
        #A small piece of code to output every 100th cost, so that you can monitor the network's progress
        cost = (layer3A - labels_train[i])**2
        if i%100 == 0:
            print (cost)
    
    #Wipes all layer's nueron's activations
    layer1A = numpy.zeros((lay1_size),dtype = float )
    layer2A = numpy.zeros((lay2_size),dtype = float )
    layer3A = numpy.zeros((lay3_size),dtype = float )
    
    return weight21,weight32,bias21,bias32,layer1A,layer2A,layer3A
    


In [13]:
#Trains the network
weight21,weight32,bias21,bias32,layer1A,layer2A,layer3A = train()


[0.63159383]
[0.0012756]
[0.37278803]
[0.03365413]
[0.24271849]
[0.20591414]
[0.18382802]
[0.15089502]
[0.23597964]
[0.11905404]
[0.05621063]
[0.34692585]
[0.08340025]
[0.00316071]
[0.06748741]
[0.00111331]
[0.00175009]
[0.00037818]
[0.00137303]
[0.19187719]
[0.04207577]
[0.03597878]
[0.6593843]
[0.00034191]
[0.10838397]
[0.02683998]
[0.02498412]
[0.00072166]
[0.02435006]
[0.00025158]
[0.02230951]
[0.0002864]
[0.02154549]
[0.00056502]
[0.01966806]
[0.01857581]
[0.00129228]
[0.01700857]
[0.01073564]
[0.00109116]
[0.01462393]
[0.01408108]
[0.01298052]
[0.01263293]
[0.01187953]
[0.01155364]
[0.000164]
[0.00015769]
[0.00026483]
[0.0098742]
[0.4989028]
[0.00926379]
[0.00930733]
[0.00902361]
[0.00864848]
[0.00852434]
[0.00835899]
[0.00799468]
[0.00012544]
[0.00012432]
[0.00669511]
[0.00652749]
[0.00012325]
[0.00641157]
[0.00628175]
[7.75093918e-05]
[0.00608505]
[0.00010718]
[0.00453143]
[9.45332044e-05]
[0.000221]
[9.16298684e-05]
[0.00610294]
[0.00598788]
[8.70865464e-05]
[0.00563943]
[0.00

In [14]:
#Small piece of code to run through the MNIST TEST DATA through the trained nueral network

accuracy = 0
for i in range(len(labels_test)):
    counter = test(i, weight21,weight32,bias21,bias32,layer1A,layer2A,layer3A)
    accuracy = accuracy + counter

p_accuracy = accuracy/len(labels_test)
print(f"Computer finished testing with a final accuracy of: {p_accuracy}")

Computer: 1, with a [-0.993118]%
Actually: 1, with a cost of [4.73619028e-05]
Computer: 0, with a [0.95341337]%
Actually: 0, with a cost of [0.00217031]
Computer: 1, with a [-0.9931249]%
Actually: 1, with a cost of [4.72669475e-05]
Computer: 0, with a [0.95341337]%
Actually: 0, with a cost of [0.00217031]
Computer: 0, with a [0.95341337]%
Actually: 0, with a cost of [0.00217031]
Computer: 1, with a [-0.993118]%
Actually: 1, with a cost of [4.73619244e-05]
Computer: 0, with a [0.95341337]%
Actually: 0, with a cost of [0.00217031]
Computer: 0, with a [0.95341337]%
Actually: 0, with a cost of [0.00217031]
Computer: 1, with a [-0.99404002]%
Actually: 1, with a cost of [3.55214084e-05]
Computer: 1, with a [-0.99484197]%
Actually: 1, with a cost of [2.66052463e-05]
Computer: 1, with a [-0.99366456]%
Actually: 1, with a cost of [4.01377418e-05]
Computer: 1, with a [-0.99311766]%
Actually: 1, with a cost of [4.7366592e-05]
Computer: 1, with a [-0.99317618]%
Actually: 1, with a cost of [4.65645