# Artificial Neural Network

## Implementation

### Imports

In [1]:
%matplotlib inline
import scipy.special
import matplotlib.pyplot as plt
import numpy
import pandas
import random
import os

### Global variables

In [2]:
input_nodes = 784
hidden_nodes = 400
output_nodes = 10

# initial values
learning_rate = 0.2
batch_size = 40
epochs = 100

### Neural network class

In [3]:
class neuralNetwork:
	"""Artificial Neural Network classifier.

	Parameters
	------------
	lr : float
		Learning rate (between 0.0 and 1.0)
	ep : int
		Number of epochs for training the network towards achieving convergence
	batch_size : int
		Size of the training batch to be used when calculating the gradient descent.
		batch_size = 0 standard gradient descent
		batch_size > 0 stochastic gradient descent

	inodes : int
		Number of input nodes which is normally the number of features in an instance.
	hnodes : int
		Number of hidden nodes in the net.
	onodes : int
		Number of output nodes in the net.

	Attributes
	-----------
	wih : 2d-array
		Input2Hidden node weights after fitting
	who : 2d-array
		Hidden2Output node weights after fitting
	E : list
		Sum-of-squares error value in each epoch.

	Results : list
		Target and predicted class labels for the test data.

	Functions
	---------
	activation_function : float (between 1 and -1)
		implments the sigmoid function which squashes the node input
	"""

	def __init__(self, inputnodes = 784, hiddennodes = 200, outputnodes = 10, learningrate = 0.2, batch_size = 40, epochs = 100):
		self.inodes = inputnodes
		self.hnodes = hiddennodes
		self.onodes = outputnodes

		#link weight matrices, wih (input to hidden) and who (hidden to output)
		#a weight on link from node i to node j is w_ij

		#Draw random samples from a normal (Gaussian) distribution centered around 0.
		#numpy.random.normal(loc to centre gaussian=0.0, scale=1, size=dimensions of the array we want)
		#scale is usually set to the standard deviation which is related to the number of incoming links i.e.
		#1/sqrt(num of incoming inputs). we use pow to raise it to the power of -0.5.
		#We have set 0 as the centre of the guassian dist.
		# size is set to the dimensions of the number of hnodes, inodes and onodes
		self.wih = numpy.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
		self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))

		#set the learning rate
		self.lr = learningrate

		#set the batch size
		self.bs = batch_size

		#set the number of epochs
		self.ep = epochs

		#store errors at each epoch
		self.E = []

		#store results from testing the model
		#keep track of the network performance on each test instance
		self.results = []

		#define the activation function here
		#specify the sigmoid squashing function. Here expit() provides the sigmoid function.
		#lambda is a short cut function which is executed there and then with no def (i.e. like an anonymous function)
		self.activation_function = lambda x: scipy.special.expit(x)

		pass

	def batch_input(self, input_list):
		"""Yield consecutive batches of the specified size from the input list."""
		for i in range(0, len(input_list), self.bs):
			yield input_list[i:i + self.bs]

	#train the neural net
	def train(self, train_inputs):
		"""Training the neural net.
			This includes the forward pass ; error computation;
			backprop of the error ; calculation of gradients and updating the weights.

			Parameters
			----------
			train_inputs : {array-like}, shape = [n_instances, n_features]
			Training vectors, where n_instances is the number of training instances and
			n_features is the number of features.
			Note this contains all features including the class feature which is in first position

			Returns
			-------
			self : object
		"""

		for e in range(self.ep):
			print("Training epoch#: ", e)
			sum_error = 0.0
			for batch in self.batch_input(train_inputs):
				#creating variables to store the gradients
				delta_who = 0
				delta_wih = 0

				for instance in batch:

					# split it by the commas
					all_values = instance.split(',')
					# scale and shift the inputs to address the problem of diminishing weights due to multiplying by zero
					# divide the raw inputs which are in the range 0-255 by 255 will bring them into the range 0-1
					# multiply by 0.99 to bring them into the range 0.0 - 0.99.
					# add 0.01 to shift them up to the desired range 0.01 - 1.
					inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

					#create the target output values for each instance so that we can use it with the neural net
					#note we need 10 nodes where each represents one of the digits
					targets = numpy.zeros(output_nodes) + 0.01 #all initialised to 0.01
					#all_value[0] has the target class label for this instance
					targets[int(instance[0])] = 0.99

					#convert  inputs list to 2d array
					inputs = numpy.array(inputs,  ndmin = 2).T
					targets = numpy.array(targets, ndmin = 2).T

					#calculate signals into hidden layer
					hidden_inputs = numpy.dot(self.wih, inputs)
					#calculate the signals emerging from the hidden layer
					hidden_outputs = self.activation_function(hidden_inputs)

					#calculate signals into final output layer
					final_inputs = numpy.dot(self.who, hidden_outputs)
					#calculate the signals emerging from final output layer
					final_outputs = self.activation_function(final_inputs)

					#to calculate the error we need to compute the element wise diff between target and actual
					output_errors = targets - final_outputs
					#Next distribute the error to the hidden layer such that hidden layer error
					#is the output_errors, split by weights, recombined at hidden nodes
					hidden_errors = numpy.dot(self.who.T, output_errors)

					## for each instance accumilate the gradients from each instance
					## delta_who are the gradients between hidden and output weights
					## delta_wih are the gradients between input and hidden weights
					delta_who += numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
					delta_wih += numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))

					sum_error += numpy.dot(output_errors.T, output_errors)#this is the sum of squared error accumilated over each batced instance

				pass

				# update the weights by multiplying the gradient with the learning rate
				# note that the deltas are divided by batch size to obtain the average gradient according to the given batch
				# obviously if batch size = 1 then we dont need to bother with an average
				self.who += self.lr * (delta_who / self.bs)
				self.wih += self.lr * (delta_wih / self.bs)
			pass # batch
			self.E.append(numpy.asfarray(sum_error).flatten())
			print("errors (SSE): ", self.E[-1])
		pass # epoch

	#query the neural net
	def query(self, inputs_list):
		#convert inputs_list to a 2d array
		#print(numpy.matrix(inputs_list))
		#inputs_list [[ 1.   0.5 -1.5]]
		inputs = numpy.array(inputs_list, ndmin = 2).T
		#once converted it appears as follows
		#[[ 1. ]
		# [ 0.5]
		# [-1.5]]
		#print(numpy.matrix(inputs))

		#propogate input into hidden layer. This is the start of the forward pass
		hidden_inputs = numpy.dot(self.wih, inputs)

		#squash the content in the hidden node using the sigmoid function (value between 1, -1)
		hidden_outputs = self.activation_function(hidden_inputs)

		#propagate into output layer and the apply the squashing sigmoid function
		final_inputs = numpy.dot(self.who, hidden_outputs)

		final_outputs = self.activation_function(final_inputs)
		return final_outputs

	#iterate through all the test data to calculate model accuracy
	def test(self, test_inputs):
		self.results = []

		#go through each test instances
		for instance in test_inputs:
			all_values = instance.split(',') # extract the input feature values for the instance

			target_label = int(all_values[0]) # get the target class for the instance

			#scale and shift the inputs this is to make sure values dont lead to zero when multiplied with weights
			inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

			#query the network with test inputs
			#note this returns 10 output values ; of which the index of the highest value
			# is the networks predicted class label
			outputs = self.query(inputs)

			#get the index of the highest output node as this corresponds to the predicted class
			predict_label = numpy.argmax(outputs) #this is the class predicted by the ANN

			self.results.append([predict_label, target_label])
			#compute network error
			#if (predict_label == target_label):
			#	self.results.append(1)
			#else:
			#	self.results.append(0)
			pass
		pass
		self.results = numpy.asfarray(self.results) # flatten results to avoid nested arrays

## MNIST dataset

### Loading files

In [4]:
mnist_train_file = open("../datasets/mnist_train.csv", 'r')
mnist_train_list = mnist_train_file.readlines() 
mnist_train_file.close() 
print("train set size: ", len(mnist_train_list))

mnist_test_file = open("../datasets/mnist_test.csv", 'r')
mnist_test_list = mnist_test_file.readlines()
mnist_test_file.close()
print("test set size: ", len(mnist_test_list))

# quick run
mnist_train_list = numpy.random.choice(mnist_train_list, 1000, replace = False)
mnist_test_list = numpy.random.choice(mnist_test_list, 1000, replace = False)

train set size:  60000
test set size:  10000


### Normalization

In [5]:
normalized_data = []
"""
for entry in mnist_train_list:
	all_values = entry.split(',')
	normalized_data.append((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
mnist_train_list = normalized_data
print(normalized_data[0])
"""

"\nfor entry in mnist_train_list:\n\tall_values = entry.split(',')\n\tnormalized_data.append((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)\nmnist_train_list = normalized_data\nprint(normalized_data[0])\n"

#### Epochs

##### Training

In [None]:
ann_epoch_numbers_list = []
epoch_numbers = [1, 10, 100, 200, 300]

for epoch_number in epoch_numbers:
    print("Number of epochs : ", epoch_number)
    n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size, epoch_number)
    n.train(mnist_train_list)
    ann_epoch_numbers_list.append(n)

Number of epochs :  1
Training epoch#:  0
errors (SSE):  [638.84431313]
Number of epochs :  10
Training epoch#:  0
errors (SSE):  [742.70999774]
Training epoch#:  1
errors (SSE):  [317.66976716]
Training epoch#:  2
errors (SSE):  [253.9697914]
Training epoch#:  3
errors (SSE):  [221.1016366]
Training epoch#:  4
errors (SSE):  [199.86066287]
Training epoch#:  5
errors (SSE):  [184.62796603]
Training epoch#:  6
errors (SSE):  [172.96904941]
Training epoch#:  7
errors (SSE):  [163.61723705]
Training epoch#:  8
errors (SSE):  [155.84426622]
Training epoch#:  9
errors (SSE):  [149.20234353]
Number of epochs :  100
Training epoch#:  0
errors (SSE):  [713.32933604]
Training epoch#:  1


##### Compute the accuracy of the neural networks

In [None]:
epoch_numbers_model_results = []
for model in ann_epoch_numbers_list: 
    correct = 0
    model.test(mnist_test_list)
    for result in model.results:
        if (result[0] == result[1]):
                correct += 1
        pass
    correct = 100 * (correct / len(model.results))
    epoch_numbers_model_results.append(correct)
    pass
pass

##### Show the accuracy of the neural networks

In [None]:
objects = epoch_numbers
y_pos = numpy.arange(len(objects))
performance = epoch_numbers_model_results

plt.bar(y_pos, performance, align = 'center', alpha = 0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Accuracy')
plt.title('epochs')

fig_epoch_numbers = plt.gcf()
plt.show()
fig_epoch_numbers.savefig('../images/ann_epoch_numbers_mnist.png', dpi = 300)

#### Batch size

##### Training

In [None]:
ann_batch_sizes_list = []
batch_sizes = [1, 10, 100, 200, len(mnist_train_list)]

for batch_size in batch_sizes:
    print("Batch_sizes : ", batch_size)
    n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size, epochs)
    n.train(mnist_train_list)
    ann_batch_sizes_list.append(n)

##### Compute the accuracy of the neural networks

In [None]:
batch_sizes_model_results = []
for model in ann_batch_sizes_list: 
    correct = 0
    model.test(mnist_test_list)
    for result in model.results:
        if (result[0] == result[1]):
                correct += 1
        pass
    correct = 100 * (correct/len(model.results))
    batch_sizes_model_results.append(correct)
    pass
pass

##### Show the accuracy of the neural networks

In [None]:
objects = batch_sizes
y_pos = numpy.arange(len(objects))
performance = batch_sizes_model_results

plt.bar(y_pos, performance, align = 'center', alpha = 0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Accuracy')
plt.title('batch_size')

fig_batch_sizes = plt.gcf()
plt.show()
fig_batch_sizes.savefig('../images/ann_batch_sizes_mnist.png', dpi = 300)

#### Learning rate

##### Training

In [None]:
ann_learning_rates_list = []
learning_rates = [0.01, 0.1, 0.2, 0.4, 0.8]

for learning_rate in learning_rates:
    print("Learning_rates : ", learning_rate)
    n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size, epochs)
    n.train(mnist_train_list)
    ann_learning_rates_list.append(n)

##### Compute the accuracy of the neural networks

In [None]:
learning_rates_model_results = []
for model in ann_learning_rates_list: 
    correct = 0
    model.test(mnist_test_list)
    for result in model.results:
        if (result[0] == result[1]):
                correct += 1
        pass
    correct = 100 * (correct/len(model.results))
    learning_rates_model_results.append(correct)
    pass
pass

##### Show the accuracy of the neural networks

In [None]:
objects = learning_rates
y_pos = numpy.arange(len(objects))
performance = learning_rates_model_results

plt.bar(y_pos, performance, align = 'center', alpha = 0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Accuracy')
plt.title('learning_rate')

fig_learning_rates = plt.gcf()
plt.show()
fig_learning_rates.savefig('../images/ann_learning_rates_mnist.png', dpi = 300)

## "Red and White Wine Quality EDA" dataset

### Loading file

In [9]:
wine_quality_file = open("../datasets/winequality-white.csv", 'r')
wine_quality_list = wine_quality_file.readlines()
wine_quality_file.close()

print("set size: ", len(wine_quality_list))

set size:  4899


### Normalize data

In [10]:
print(wine_quality_list[1])
"""
for entry in wine_quality_list:
    entry = ?
"""
print(wine_quality_list[1])

7;0.27;0.36;20.7;0.045;45;170;1.001;3;0.45;8.8;6

7;0.27;0.36;20.7;0.045;45;170;1.001;3;0.45;8.8;6



### Split data into *training set* and *testing set*

In [None]:
random.shuffle(wine_quality_list)

wine_quality_train_list = wine_quality_list[:int(len(wine_quality_list) * .8)]
print("train set size: ", len(wine_quality_train_list))

wine_quality_test_list = wine_quality_list[int(len(wine_quality_list) * .8):] # why does it work ? should be 0.2 ! is there something I missed ? it's been 5 years I haven't used Python, mais quand mÃªme faut pas abuser...
print("test set size: ", len(wine_quality_test_list))

#### Epochs

##### Training

##### Compute the accuracy of the neural networks

##### Show the accuracy of the neural networks

#### Batch size

##### Training

##### Compute the accuracy of the neural networks

##### Show the accuracy of the neural networks

#### Learning rate

##### Training

##### Compute the accuracy of the neural networks

##### Show the accuracy of the neural networks