In [1]:
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special

In [2]:
class NeuralNetwork:
    # initialize needed data
    def __init__(self,input_nodes,hidden_nodes,output_nodes,learning_rate):
        # set number of nodes in input, hidden and output layers
        self.inodes = input_nodes
        self.hnodes = hidden_nodes
        self.onodes = output_nodes
        
        # initialize weight matrices
        # input->hidden
        #self.wih = numpy.random.rand(self.hnodes,self.inodes)-0.5
        # center, stardard deviation 1/sqrt(number of links to each node of incoming layer)
        self.wih = numpy.random.normal(0.0,pow(self.inodes,-0.5),(self.hnodes,self.inodes)) 
        print(self.wih)
        # hidden->output
        # self.who = numpy.random.rand(self.onodes,self.hnodes)-0.5
        self.who = numpy.random.normal(0.0,pow(self.hnodes,-0.5),(self.onodes,self.hnodes))
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        # learning rate
        self.lr = learning_rate
        pass
    
    # refine the weights 
    def train(self,inputs_list,targets_list):
        # convert inputs_list to 2d array
        inputs = numpy.array(inputs_list,ndmin=2).T
        targets = numpy.array(targets_list,ndmin=2).T
        
        # calcualte signal into hidden layer
        hidden_inputs = numpy.dot(self.wih,inputs)
        # calculate outputs from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        final_inputs = numpy.dot(self.who,hidden_outputs)
        final_outputs = self.activation_function(final_inputs)
        
        # errors
        output_errors = targets - final_outputs
        
        # calculate errors for hidden nodes e_hidden = who.T * e_out 
        hidden_errors = numpy.dot(self.who.T, output_errors)
        
        #update weights between the hidden and output layers
        # new = old - lr * de/dwij
        self.who += self.lr * numpy.dot((output_errors * final_outputs * (1 - final_outputs)),numpy.transpose(hidden_outputs))
        #update weights between the input and hidden layers
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1 - hidden_outputs)),numpy.transpose(inputs))
        pass
    
    # get answer from output nodes
    def query(self,inputs_list):
        # convert inputs_list to 2d array
        inputs = numpy.array(inputs_list,ndmin=2).T
        
         # calculate signals into hidden layer -> X = WIH * I
        hidden_inputs = numpy.dot(self.wih,inputs)
         # calculate the signals emerging from hidden layer -> O = activation_function(hidden_inputs)
        hidden_outputs = self.activation_function(hidden_inputs)
        
        final_inputs = numpy.dot(self.who,hidden_outputs)
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs
        pass
    
    def inverse_query(self,label_list):
        label = numpy.array(label_list,ndmin=2).T
        pass

In [3]:
##### Also possible to variate activation functions and scaling
# values for input, hidden and output nodes
input_nodes = 784
hidden_nodes = 100  ##### Change
output_nodes = 10

# learning rate
learning_rate = 0.3 ##### Change

# instanse of neural network
n = NeuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)

# load the MINST training data file into a list
training_data_file = open("mnist dataset/mnist_train.csv",'r')
training_data_list = training_data_file.readlines()
training_data_file.close()

# train the neural network
for epochs in range(0,1):  ##### Change
    for record in training_data_list:
        all_values = record.split(',')
        inputs = (numpy.asfarray(all_values[1:])/255*0.99)+0.01
        targets = numpy.zeros(output_nodes)+0.01
        targets[int(all_values[0])]=0.99
        n.train(inputs,targets)
        pass
    pass

[[ 0.00961617  0.01511934 -0.01671644 ...,  0.00858902 -0.05164853
   0.01321544]
 [-0.02208403 -0.01844109  0.00787377 ..., -0.01825572 -0.04446242
   0.02601322]
 [ 0.06099663  0.00932933 -0.03559514 ..., -0.02583146  0.03771019
   0.02904623]
 ..., 
 [-0.05376414  0.06188282  0.02649261 ...,  0.00016835 -0.04267917
  -0.01228715]
 [-0.01229992  0.0019419   0.0838303  ..., -0.04863056  0.02997458
  -0.05980403]
 [-0.02889273 -0.01654082 -0.00064863 ...,  0.07354785  0.02875533
  -0.00422175]]


In [4]:
# test the neural network!
testing_data_file = open("mnist dataset/mnist_test.csv",'r')
testing_list = testing_data_file.readlines()
testing_data_file.close()

scorecard=[]
for testing_record in testing_list:
    testing_inputs = testing_record.split(',')

    testing_list = (numpy.asfarray(testing_inputs[1:])/255*0.99)+0.01
    result=n.query(testing_list)
    index=numpy.argmax(result)
    if (index==int(testing_inputs[0])):
        scorecard.append(1)
    else:
        scorecard.append(0)
        pass
    pass
scorecard_array=numpy.asarray(scorecard)
print("Performance: ",scorecard_array.sum()/scorecard_array.size)

Performance:  0.9462
