In [1]:
# python notebook for Make Your Own Neural Network
# code for a 3-layer neural network, and code for learning the MNIST dataset
# this version creates additional training examples by rotating each original by +/- 10 degrees
# (c) Tariq Rashid, 2016
# license is GPLv2

In [2]:
# numpy provides arrays and useful functions for working with them
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# scipy.ndimage for rotating image arrays
import scipy.ndimage

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# neural network class definition
class neuralNetwork:
    
    
    # initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each input, hidden, output layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # link weight matrices, wih and who
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = numpy.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))

        # learning rate
        self.lr = learningrate
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        pass

    
    # train the neural network
    def train(self, inputs_list, targets_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        targets = numpy.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # output layer error is the (target - actual)
        output_errors = targets - final_outputs
        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = numpy.dot(self.who.T, output_errors)
        
        # update the weights for the links between the hidden and output layers
        self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
        
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
        
        pass

    
    # query the neural network
    def query(self, inputs_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs

In [4]:
# number of input, hidden and output nodes
#input_nodes = 1024
#hidden_nodes = 200
#output_nodes = 10

# learning rate
#learning_rate = 0.1

# create instance of neural network
#n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)

In [5]:
# load the mnist training data CSV file into a list
#training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
#training_data_list = training_data_file.readlines()
#training_data_file.close()


def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# trining data
for bno in range (5):
    file_name = "CIFAR-10/data_batch_{0}".format(bno+1)
    dict = unpickle(file_name)
    if(bno == 0):
        training_data_list = dict[b'data']
        training_label_list = dict[b'labels']
    else:
        #training_data_list = training_data_list + dict[b'data']
        training_data_list = numpy.concatenate((training_data_list, dict[b'data']), axis=0)
        training_label_list = training_label_list + dict[b'labels']
        pass
    pass

# test data
file_name = "CIFAR-10/test_batch"
dict = unpickle(file_name)
test_data_list = dict[b'data']
test_label_list = dict[b'labels']

gray_data_list = numpy.empty([50000,1024])
idx = 0
for t_rgb in training_data_list:
    r = t_rgb[:1024]
    g = t_rgb[1024:2048]
    b = t_rgb[2048:]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    gray_data_list[idx] = gray
    idx = idx +1
    pass
training_data_list = gray_data_list

gray_data_list = numpy.empty([10000,1024])
idx = 0
for t_rgb in test_data_list:
    r = t_rgb[:1024]
    g = t_rgb[1024:2048]
    b = t_rgb[2048:]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    gray_data_list[idx] = gray
    idx = idx +1
    pass
test_data_list = gray_data_list

    
#plt.imshow(training_data_list[1].reshape(32,32), cmap='gray')

#print(len(gray))
#print(gray)
#plt.imshow(gray.reshape(32,32), cmap='gray')
#plt.imshow(training_data_list[1].reshape(3,32,32).transpose(1,2,0))

#print(dict[b'batch_label'])
#print(dict[b'filenames'])


#plt.imshow(training_data_list[11].reshape(3,32,32).transpose(1,2,0))


#print(len(training_data_list))
#print(type(training_data_list))
#print(training_data_list)
print(training_data_list[0])
#print(type(training_data_list[0]))
#print(training_data_list[0][0], training_data_list[0][1023], training_data_list[0][3071], training_data_list[0][3071])
#print(len(training_label_list))
#print(type(training_label_list))
#print(training_label_list[0])
#print(training_label_list[0], training_label_list[1])

#print("test_data length", len(test_data_list))
#print(len(test_label_list))

#print("training_data_list.ndim = ", training_data_list.ndim)
#print("training_data_list.shape = ", training_data_list.shape)
#print("type(training_data_list[0][0]) = ", type(training_data_list[0][0]))
#label_array = numpy.array(training_label_list, dtype='uint8', ndmin=2).T
#print("label_array.ndim = ", label_array.ndim)
#print("label_array.shape = ", label_array.shape)
#print("type(label_array[0][0]) = ", type(label_array[0][0]))

#training_data_list=numpy.concatenate((label_array, training_data_list), axis=1)

#print("training_data_list.ndim = ", training_data_list.ndim)
#print("training_data_list.shape = ", training_data_list.shape)
#print("type(training_data_list[0][0]) = ", type(training_data_list[0][0]))


#dic_ex = {b"k1":'value1', "k2":[1, 3], "k3":'string'}
#print(dic_ex[b"k1"], dic_ex["k2"], dic_ex["k3"])


#print(len(training_data_list))
#print(type(training_data_list))
#print(training_data_list)
#print(training_data_list[0])

#print(training_data_list[0,0],training_data_list[0,1:])
#img = training_data_list[1]

#print("len(img)=", len(img), "len(img[1:]=", len(img[1:]) )
#print("img=",img)
#plt.imshow(img[1:].reshape(3,32,32).transpose(1,2,0))

#new_list = training_data_list[0,1:].reshape(3,32,32)
#print(new_list)
#new_list = new_list.transpose(1,2,0)
#plt.imshow(new_list)
#plt.imshow((training_data_list[1][1:]).reshape(3,32,32).transpose(1,2,0))
#print("training_data_list[1][1:]", training_data_list[1][1:])

[ 61.2111  44.9847  48.023  ... 188.5304 123.9759  98.9767]


In [6]:
def do_experiment( in_no, hi_no, out_no, lr, epochs): 

    # number of input, hidden and output nodes
    input_nodes = in_no   #1024
    hidden_nodes = hi_no  #200
    output_nodes = out_no #10

    # learning rate
    learning_rate = lr

    # create instance of neural network
    n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)

    # train the neural network

    # epochs is the number of times the training data set is used for training
    # epochs = 5

    for e in range(epochs):
        # go through all records in the training data set
        for label, record in zip(training_label_list, training_data_list):
            # split the record by the ',' commas
            #all_values = record.split(',')
            # all_values = record
            # scale and shift the inputs
            inputs = (numpy.asfarray(record) / 255.0 * 0.99) + 0.01
            # create the target output values (all 0.01, except the desired label which is 0.99)
            targets = numpy.zeros(output_nodes) + 0.01
            # all_values[0] is the target label for this record
            #targets[int(all_values[0])] = 0.99
            targets[int(label)] = 0.99
            n.train(inputs, targets)

            pass
        pass

    # test the neural network

    # scorecard for how well the network performs, initially empty
    scorecard = []

    # go through all the records in the test data set
    for correct_label, record in zip(test_label_list, test_data_list):
        # split the record by the ',' commas
        #all_values = record.split(',')
        # correct answer is first value
        #correct_label = int(all_values[0])
        # scale and shift the inputs
        #inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        inputs = (numpy.asfarray(record) / 255.0 * 0.99) + 0.01
        # query the network
        outputs = n.query(inputs)
        # the index of the highest value corresponds to the label
        label = numpy.argmax(outputs)
        # append correct or incorrect to list
        #print(correct_label)
        if (label == correct_label):
            # network's answer matches correct answer, add 1 to scorecard
            scorecard.append(1)
        else:
            # network's answer doesn't match correct answer, add 0 to scorecard
            scorecard.append(0)
            pass

        pass

    scorecard_array = numpy.asarray(scorecard)
    performance =  scorecard_array.sum() / scorecard_array.size

    return performance


In [None]:
# load the mnist test data CSV file into a list
#test_data_file = open("mnist_dataset/mnist_test.csv", 'r')
#test_data_list = test_data_file.readlines()
#test_data_file.close()

In [None]:
# epoch=1,
# [1024, 50, 10, 0.1, 1] result= 0.2516
# [1024, 200, 10, 0.1, 1] result= 0.2351
# [1024, 300, 10, 0.1, 1] result= 0.2426
# [1024, 400, 10, 0.1, 1] result= 0.2289
# [1024, 500, 10, 0.1, 1] result= 0.2692

# epoch=5
# [1024, 50, 10, 0.1, 5] result= 0.2311
# [1024, 100, 10, 0.1, 5] result= 0.265
# [1024, 200, 10, 0.1, 5] result= 0.2475
# [1024, 300, 10, 0.2, 5] result= 0.1846
# [1024, 400, 10, 0.1, 5] result= 0.2482
# [1024, 500, 10, 0.1, 5] result= 0.23


# hidden 50, 100, 200, learning_rate <= 0.1, epochs >=5

# hidden 100
# [1024, 100, 10, 0.01, 10] result= 0.3702
# [1024, 100, 10, 0.01, 20] result= 0.3871
# [1024, 100, 10, 0.01, 50] result= 0.3772
# [1024, 100, 10, 0.005, 20] result= 0.3783
# [1024, 100, 10, 0.005, 50] result= 0.3978




in_parameter_list = [
    [1024, 100, 10, 0.001, 5],
    [1024, 100, 10, 0.003, 5],
    [1024, 100, 10, 0.005, 5],
    [1024, 100, 10, 0.01, 5],
    [1024, 100, 10, 0.02, 5],
    [1024, 100, 10, 0.001, 10],
    [1024, 100, 10, 0.003, 10],
    [1024, 100, 10, 0.005, 10],
    [1024, 100, 10, 0.01, 10],
    [1024, 100, 10, 0.02, 10],
    [1024, 100, 10, 0.001, 20],
    [1024, 100, 10, 0.003, 20],
    [1024, 100, 10, 0.005, 20],
    [1024, 100, 10, 0.01, 20],
    [1024, 100, 10, 0.02, 20],
    [1024, 100, 10, 0.001, 50],
    [1024, 100, 10, 0.003, 50],
    [1024, 100, 10, 0.005, 50],
    [1024, 100, 10, 0.01, 50],
    [1024, 100, 10, 0.02, 50]
]

for in_parameter in in_parameter_list:
    result=do_experiment(*in_parameter)
    print(in_parameter, "result=", result)
    pass

[1024, 100, 10, 0.001, 5] result= 0.2739
[1024, 100, 10, 0.003, 5] result= 0.3143
[1024, 100, 10, 0.005, 5] result= 0.3307
[1024, 100, 10, 0.01, 5] result= 0.3474
[1024, 100, 10, 0.02, 5] result= 0.3545
[1024, 100, 10, 0.001, 10] result= 0.3011
[1024, 100, 10, 0.003, 10] result= 0.3424
[1024, 100, 10, 0.005, 10] result= 0.3554
[1024, 100, 10, 0.01, 10] result= 0.3721
[1024, 100, 10, 0.02, 10] result= 0.3605
[1024, 100, 10, 0.001, 20] result= 0.3294
[1024, 100, 10, 0.003, 20] result= 0.3644
[1024, 100, 10, 0.005, 20] result= 0.3783
[1024, 100, 10, 0.01, 20] result= 0.3733
[1024, 100, 10, 0.02, 20] result= 0.3455
[1024, 100, 10, 0.001, 50] result= 0.3602
[1024, 100, 10, 0.003, 50] result= 0.3946
[1024, 100, 10, 0.005, 50] result= 0.3978
[1024, 100, 10, 0.01, 50] result= 0.362
