In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv('./data/mnist_mini_train.csv')
train_df.head()

Unnamed: 0,label,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p775,p776,p777,p778,p779,p780,p781,p782,p783,p784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
train_x = np.array(train_df.iloc[:, 1:]).transpose() # shape = 784 * N, so each col is a sample
train_y = np.array(train_df.iloc[:, 0]).reshape((1, len(train_df))) # shape = 1 * N

In [26]:
train_y.shape

(1, 15000)

In [3]:
def show_sample(index):
	example = train_df.iloc[index][1:]
	matrix = np.array(example).reshape(28, 28)

	plt.imshow(matrix, cmap='gray', vmin=0, vmax=255)
	plt.axis('off')
	plt.show()

In [9]:
class NeuralNetwork:
	def __init__(self, *, parameters: np.ndarray) -> None:
		if parameters.shape != (13002, 1):
			raise ValueError('parameters shape should be (13002, 1)!')
		
		self.parameters = parameters

		count = 0
		
		# weights1 is of size (16)x(28*28)
		total_size = 16*28*28
		self.weights1 = parameters[count:count+total_size].reshape(16, 28*28)

		count += total_size 

		# biases1 is of size 16x1
		total_size = 16*1
		self.biases1 = parameters[count:count+total_size].reshape(16, 1)

		count += total_size 

		# weights2 is of size 16x16
		total_size = 16*16
		self.weights2 = parameters[count:count+total_size].reshape(16, 16)

		count += total_size 

		# biases2 is of size 16x1
		total_size = 16*1
		self.biases2 = parameters[count:count+total_size].reshape(16, 1)
		
		count += total_size 

		# weights3 is of size 10*16
		total_size = 10*16
		self.weights3 = parameters[count:count+total_size].reshape(10, 16)

		count += total_size 

		# biases3 is of size 10*1
		total_size = 10*1
		self.biases3 = parameters[count:count+total_size].reshape(10, 1)

		self.input_layer = np.zeros((784, 1))
		
		# inner hidden layers of the network
		self.z_hlayer1 = np.zeros((16, 1))
		self.hlayer1 = NeuralNetwork.sigmoid(self.z_hlayer1)

		self.z_hlayer2 = np.zeros((16, 1))
		self.hlayer2 = NeuralNetwork.sigmoid(self.z_hlayer2)

		self.z_output_layer = np.zeros((10, 1))
		self.output_layer = NeuralNetwork.sigmoid(self.z_output_layer)



		del total_size
		del count


	def load_input_layer(self, input_vector: np.ndarray) -> None:
		"""
			Laad the input handwritten digit
			input_vector: np.ndarray of shape (784, 1)
			these numbers are between 0-255
			the function will squish them between 0-1
		"""
		self.input_layer = (input_vector / 255)
		

	def cost_of_single_sample(self, sample: np.ndarray, true_label: int) -> float:
		self.feed_forward()
		
		# construct the output vector based on the label
		desired_output = np.zeros((10, 1))
		desired_output[true_label] = 1.0
		
		# compare the self.output_layer and the desired_output
		# using mean squared error
		MSE = np.sum((self.output_layer - desired_output)**2)
		return MSE


	def cost_of_test_sample(self, test_samples: np.ndarray, true_labels: np.ndarray[int]) -> float:
		"""
			samples: is a np array which each row represents one sample, and 
			each row has 784 numbers in them, the pixel values
		"""
		MSE: float = 0
		for sample, label in zip(test_samples, true_labels):
			MSE += self.cost_of_single_sample(sample, label)
		
		return MSE
	

	def accuracy_score(self, test_samples: np.ndarray, true_labels: np.ndarray[int]) -> float:
		total: int = len(test_samples)
		trues: int = 0

		for sample, label in zip(test_samples, label):
			result = np.argmax(self.predict(sample))
			if result == label:
				trues += 1
		
		return (trues / total)


	def predict(self, sample: np.ndarray) -> np.ndarray:
		self.load_input_layer(sample)
		self.feed_forward()
		return self.output_layer


	def backprop_one_sample(self, sample: np.ndarray, label: int):
		one_hot_label = np.zeros(self.output_layer.shape)
		one_hot_label[label] = 1.0
		
		d_cost_p_ol = 2 * (self.output_layer - one_hot_label)

		#d_cost_p_ol = np.zeros(self.output_layer.shape)
		#for i, a in enumerate(self.output_layer):
		#	d_cost_p_ol[i] = 2 * (a - one_hot_label[i])
		

		d_cost_p_hl2 = np.zeros(self.hlayer2.shape)
		for i, z in enumerate(self.z_output_layer):
			d_cost_p_hl2 += d_cost_p_ol[i] * NeuralNetwork.d_sigmoid(z) * self.weights3[i:i+1, :].transpose()
		
		d_cost_p_hl1 = np.zeros(self.hlayer1.shape)
		for i, z in enumerate(self.z_hlayer1):
			d_cost_p_hl1 += d_cost_p_hl2[i] * NeuralNetwork.d_sigmoid(z) * self.weights2[i:i+1, :].transpose()

		#* should be of shape self.weights3.shape = (10, 16)
		#* self.hlayer2.shape = (16, 1)
		#* d_cost_p_ol.shape = (10, 1)
		d_cost_p_w3 = np.zeros(self.weights3.shape)
		d_cost_p_w3 = (d_cost_p_ol @ self.hlayer2.T) * NeuralNetwork.d_sigmoid(self.z_output_layer) 


		d_cost_p_w2 = d_cost_p_hl2 * NeuralNetwork.d_sigmoid(z) * self.hlayer1

		
		d_cost_p_w1 = d_cost_p_hl1 * NeuralNetwork.d_sigmoid(z) * self.input_layer



		#* should be of shape self.biases3.shape = (10, 1)
		#* d_cost_p_ol.shape = (10, 1)
		#* hence the d_sigmoiod should be a number
		d_cost_p_b3 = d_cost_p_ol * NeuralNetwork.d_sigmoid(self.z_output_layer) * 1
		d_cost_p_b2 = d_cost_p_hl2 * NeuralNetwork.d_sigmoid(self.z_hlayer2) * 1
		d_cost_p_b1 = d_cost_p_hl1 * NeuralNetwork.d_sigmoid(self.z_hlayer1) * 1

		return (
			d_cost_p_w1,
			d_cost_p_b1,
			d_cost_p_w2,
			d_cost_p_b2,
			d_cost_p_w3,
			d_cost_p_b3
		)



	def train(self, training_data: np.ndarray | pd.DataFrame):
		"""
			Trains the model with the labeled training data
			The training process:
			1. initialize self.parameters randomly
			2. calculate the cost function
			3. calculate the gradient of the cost function
			4. adjust the parameters according to the gradient and the learning rate
			5. repeat until the cost is low enough
		"""
		pass
	

	@staticmethod
	def sigmoid(x: float | np.ndarray) -> float:
		return 1 / (1 + np.exp(-x))

	@staticmethod
	def d_sigmoid(x: float | np.ndarray) -> float:
		return np.exp(-x) / (np.pow((1 + np.exp(-x)), 2))

	@staticmethod
	def softmax(a: np.ndarray) -> np.ndarray:
		return np.exp(a) / np.sum(np.exp(a))


	def feed_forward(self) -> None:
		"""
			Will calculate all the values in all the layers 
			based on the weights and biases 
		"""
		# hidden layer 1 has 16 neurons -> shape: (16, 1)
		# weights1.shape = (16, 784)
		# 16*784	784*1 + 16,1
		self.z_hlayer1 = self.weights1 @ self.input_layer + self.biases1
		self.hlayer1 = NeuralNetwork.sigmoid(self.z_hlayer1)

		# hidden layer 2 has 16 neurons -> shape: (16, 1)
		self.z_hlayer2 = self.weights2 @ self.hlayer1 + self.biases2
		self.hlayer2 = NeuralNetwork.sigmoid(self.z_hlayer2)
		
		# output layer has 10 neurons -> shape(10, 1)
		# one neuron for each digit
		self.z_output_layer = self.weights3 @ self.hlayer2 + self.biases3
		self.output_layer = NeuralNetwork.sigmoid(self.z_output_layer)


		if self.output_layer.shape != (10, 1):
			raise ValueError(f'{self.output_layer.shape}not a correct shape!')


	def print_network(self, hidden_layers = False) -> None:
		if hidden_layers:
			print('hLayer 1:')
			print(self.hlayer1)
			
			print('hLayer 2:')
			print(self.hlayer2)

		print('Output Layer:')
		print(self.output_layer)



In [33]:
ps = np.random.random((13002, 1))
NN = NeuralNetwork(parameters=ps)


In [62]:
sample = train_x[:, 0].reshape((784, 1))

NN.load_input_layer(input_vector=sample)
NN.feed_forward()
NN.feed_forward()

NN.print_network()

Output Layer:
[[0.99950768]
 [0.99970504]
 [0.99925648]
 [0.99996283]
 [0.99996871]
 [0.99995127]
 [0.99992155]
 [0.99971559]
 [0.99954786]
 [0.99994295]]


In [124]:
NN.weights3[0:1, :].T.shape


(16,)

In [67]:
import numpy as np
a = np.array(
	[
		[-1],
		[-1],
		[-1]
	]
)

b = np.array(
	[
		[1, 1, 1, 1],
		[2, 2, 2, 2],
		[6, 0, 1, 6]
	]
)

#* we need a c that is like a:b, so c.shape = (3,5)
c = np.hstack((a, b))
c = np.vstack((a.T, b.T))
print(c)

c = c.T

np.random.shuffle(c)

c = c.T

c

[[-1 -1 -1]
 [ 1  2  6]
 [ 1  2  0]
 [ 1  2  1]
 [ 1  2  6]]


array([[-1, -1, -1],
       [ 6,  2,  1],
       [ 0,  2,  1],
       [ 1,  2,  1],
       [ 6,  2,  1]])