# **Neural Net from scratch** 

---

Date Started: 22 Jan 2023 \
Date Finished: 1 Feb 2023 mostly cuz of procrastination



### ***Imports***

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from keras.datasets import mnist
from keras.utils import np_utils

## **Code**

In [None]:
class Dense:
  def __init__(self, input_size, output_size):
    self.i = input_size
    self.j = output_size
    self.weights = np.random.randn(self.i, self.j)/np.sqrt(self.i+self.j)  # xavier initialised, read paper for more. 
    self.bias = np.random.randn(1, self.j)/np.sqrt(self.i + self.j)

  def forwardPropagation(self, input):
    self.input = input 
    return np.dot(self.input, self.weights) + self.bias
        
  def backwardPropagation(self, output_gradient, learning_rate): # check derivations for all these in notes
    input_gradient = np.dot(output_gradient, self.weights.T) # dE/dX
    weight_gradient = np.dot(self.input.T, output_gradient) # dE/dW 
    bias_gradient = output_gradient # dE/dB

    self.weights -= learning_rate * weight_gradient
    self.bias -= learning_rate * bias_gradient # adjusting by negative of gradient

    return input_gradient

In [None]:
class Activation:
  def __init__(self, activation, activation_derivative):
    self.activation = activation 
    self.activation_derivative = activation_derivative

  def forwardPropagation(self, input):
    self.input = input
    return self.activation(self.input) # f(x) where f is activation func and x is input matrix
  

  def backwardPropagation(self, output_gradient, learning_rate):
    return output_gradient * self.activation_derivative(self.input) # dE/dY . f'(x)
    

In [None]:
# kinda copy pasted this and FL bc ew but do learn it pls 

class SoftmaxLayer:
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forwardPropagation(self, input):
        self.input = input
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backwardPropagation(self, output_error, learning_rate):
        input_error = np.zeros(output_error.shape)
        out = np.tile(self.output.T, self.input_size)
        return self.output * np.dot(output_error, np.identity(self.input_size) - out)

In [None]:
class FlattenLayer:
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def forwardPropagation(self, input):
        return np.reshape(input, (1, -1))
    
    def backwardPropagation(self, output_error, learning_rate):
        return np.reshape(output_error, self.input_shape)

In [None]:
# we only need the best activation, add tanh and sigmoid later tho

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')



# impliment binary cross entropy and stuff later, mse for now. also add accuracy metrics bc why not

def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_pred.size

In [None]:
class Network:
    def __init__(self, loss, loss_prime):
        self.layers = []
        self.loss = loss
        self.loss_prime = loss_prime

    def add(self, layer):
        self.layers.append(layer)


    def predict(self, input_data):
        samples = len(input_data)
        result = []

        for i in range(samples):
            # dis is just forward propagation but every time u switch layers the prev layer output becomes the func input
            output = input_data[i]
            for layer in self.layers:
                output = layer.forwardPropagation(output)
            result.append(output)

        return result


    def fit(self, x_train, y_train, epochs, learning_rate):

        samples = len(x_train)

        for i in range(epochs):
            loss = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forwardPropagation(output)

                # technically u can remove but TF aesthetics amirite
                loss += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output) # dE/dY
                for layer in reversed(self.layers):
                    error = layer.backwardPropagation(error, learning_rate)

            # calculate average error on all samples
            loss /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, loss)) # copy pasted print statement lmao

## **Test #1**

### XOR

Exclusive-Or logic gate:

| A 	| B 	| Z 	|
|---	|---	|---	|
| 0 	| 0 	| 0 	|
| 0 	| 1 	| 1 	|
| 1 	| 0 	| 1 	|
| 1 	| 1 	| 0 	|

In [None]:
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
print(f' x_train shape: {x_train.shape} \n y_train shape: {y_train.shape}')

 x_train shape: (4, 1, 2) 
 y_train shape: (4, 1, 1)


In [None]:
xorNet = Network(mse, mse_prime)

architecture = [
    Dense(2,3),
    Activation(relu, relu_prime),
    Dense(3,1),
    Activation(relu, relu_prime)
]

for layer in architecture:
  xorNet.add(layer)

In [None]:
xorNet.fit(x_train, y_train, epochs=300, learning_rate=0.1)

In [None]:
test = xorNet.predict(x_train)

n = 0
for dat in x_train:
  for sub_dat in dat:
    print(f'Input: {sub_dat}')
    print(f'Prediction: {round(test[n][0][0])}')
    print(f'True value: {y_train[n][0][0]} \n')
    n+=1

Input: [0 0]
Prediction: 0
True value: 0 

Input: [0 1]
Prediction: 0
True value: 1 

Input: [1 0]
Prediction: 0
True value: 1 

Input: [1 1]
Prediction: 0
True value: 0 



## **Test #2**
### MNIST

Handwritten digit images.

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.astype('float32')
y_train = np_utils.to_categorical(y_train) # turns it from (60k,) to (60k, 10), basically probability matrix
x_train /= 255

x_test = x_test.astype('float32')
y_test = np_utils.to_categorical(y_test)
x_test /= 255

# notes:
# training data shape: (60k, 28, 28), (60k, 10)
# testing data shape: (10k, 28, 28), (10k, 10)
# standard scaler has weird implications here, gives NaN values in the error...look into that. I feel like not standardising Y should fix it.
# normalising with /255 doesnt rlly do much excpect make the net quicker, 
# standard scaler SHOULD make it more accurate too

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [None]:
mnistNet = Network(mse, mse_prime)

architecture = [
    FlattenLayer(input_shape=(28, 28)),
    Dense(28*28, 64),
    Activation(relu, relu_prime),
    Dense(64, 32),
    Activation(relu, relu_prime),
    Dense(32, 10),
    SoftmaxLayer(10)
]

for layer in architecture:
  mnistNet.add(layer)

epochs = 50
alpha = 0.2


In [None]:
mnistNet.fit(x_train[0:10000], y_train[0:10000], epochs=epochs, learning_rate=alpha)

In [None]:
# predict = mnistNet.predict(x_test)

# index = 21

# for value in predict[index]:
#   print(f'prediction: {np.argmax(value)}')
#   print(f'true value: {np.argmax(y_test[index])}')

In [None]:
samples = 10
predictions = mnistNet.predict(x_test[:samples])
n = 0

for test, true in zip(x_test[:samples], y_test[:samples]):
    image = np.reshape(test, (28, 28))
    plt.imshow(image, cmap='binary')
    print('Input Image: ')
    plt.show()

    pred_value = np.argmax(predictions[n])
    true_value = np.argmax(true)

    print(f' Predicted Value: {pred_value} \n True Value: {true_value} \n Confidence: {predictions[n][0][pred_value]}')

    n += 1