<a href="https://colab.research.google.com/github/Inurria979/Neural-Network-from-Scracth/blob/main/NN_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The following archive is create from this link:
https://www.youtube.com/watch?v=w8yWXqWQYmU&t=493s

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('sample_data/mnist_train_small.csv')

In [None]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]

In [None]:
def init_params():
  W1 = np.random.randn(10, 784) * 0.01
  b1 = np.zeros((10, 1))
  W2 = np.random.randn(10, 10) * 0.01
  b2 = np.zeros((10, 1))
  return W1, b1, W2, b2

#MAybe is (0, Z)
def ReLU(Z):
  return np.maximum(0, Z)

def der_ReLU(Z):
  return Z > 0

def softmax(Z):
  # Subtract the maximum value from Z for numerical stability
  # axis=0 ensures we subtract the maximum value for each sample (column)
  # keepdims=True maintains the dimension for broadcasting
  Z_shifted = Z - np.max(Z, axis=0, keepdims=True)
  exp_Z = np.exp(Z_shifted)
  # Sum over the classes (rows) for each sample (column)
  return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)


def forward_prop(W1, W2, b1, b2, X):
  Z1 = W1.dot(X) + b1
  A1 = ReLU(Z1)
  Z2 = W2.dot(A1) + b2
  A2 = softmax(Z2)
  return Z1, A1, Z2, A2

def one_hot(Y):
  one_hot_Y = np.zeros((Y.astype(int).size, Y.astype(int).max() + 1))
  one_hot_Y[np.arange(Y.astype(int).size), Y.astype(int)] = 1
  return one_hot_Y.T

def back_prop(Z1, A1, Z2, A2, W2, X, Y):
  m = Y.size
  one_hot_Y = one_hot(Y)
  dZ2 = A2 - one_hot_Y
  dW2 = 1 / m * dZ2.dot(A1.T)
  db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
  dZ1 = W2.T.dot(dZ2) * der_ReLU(Z1)
  dW1 = 1 / m * dZ1.dot(X.T)
  db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)

  return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
  W1 = W1 - alpha * dW1
  b1 = b1 - alpha * db1
  W2 = W2 - alpha * dW2
  b2 = b2 - alpha * db2
  return W1, b1, W2, b2

def get_predictions(A2):
  return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
  print(predictions, Y)
  return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
  W1, b1, W2, b2 = init_params()
  #Ends at 1 to show the las iteration, starts at 1 to has i = iteration
  for i in range(1, iterations+1):
    Z1, A1, Z2, A2 = forward_prop(W1, W2, b1, b2, X)
    dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W2, X, Y)
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

    if i % 50 == 0:
      """
      print(f"\n--- Gradients at Iteration {i} ---")
      print(f"dW1 shape: {dW1.shape}, Mean abs value: {np.mean(np.abs(dW1)):.6e}")
      print(f"db1 shape: {db1.shape}, Mean abs value: {np.mean(np.abs(db1)):.6e}")
      print(f"dW2 shape: {dW2.shape}, Mean abs value: {np.mean(np.abs(dW2)):.6e}")
      print(f"db2 shape: {db2.shape}, Mean abs value: {np.mean(np.abs(db2)):.6e}")
      print("-------------------------------------")
      """
      print(f"\n--- Iteration {i} ---")
      predictions = get_predictions(A2)
      print(get_accuracy(predictions, Y))

      alpha *= 0.9 # decreases a 10 percent
  return W1, b1, W2, b2

In [None]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.001, 1000)



--- Iteration 50 ---
[1 3 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.7580398968366756

--- Iteration 100 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8105163429654192

--- Iteration 150 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8613084899205221

--- Iteration 200 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.872677509342597

--- Iteration 250 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8799936838780988

--- Iteration 300 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8853097531449023

--- Iteration 350 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8885204484446549

--- Iteration 400 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8917311437444077

--- Iteration 450 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8944154955523975

--- Iteration 500 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8963103321227432

--- Iteration 550 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8980472656455603

--- Iteration 600 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.8990999526290857

--- Iteration 650 ---
[1 8 8 ... 6 5 6] [1 8 8 ... 6 5 6]
0.90041

In [None]:
# 3. Perform forward propagation on the evaluation data using the trained parameters
# We only need the output A2 for predictions
Z1_dev, A1_dev, Z2_dev, A2_dev = forward_prop(W1, W2, b1, b2, X_dev)

# 4. Get predictions from the output A2_dev
predictions_dev = get_predictions(A2_dev)

# 5. Evaluate accuracy using Y_dev
accuracy_dev = get_accuracy(predictions_dev, Y_dev)

print(f"Accuracy on Development Set: {accuracy_dev:.4f}")
print("-------------------------------------------")
#The ReLu function kill the neuron
#500 it

[0 1 7 1 6 3 8 1 3 2 5 0 9 9 8 6 4 3 0 7 2 9 7 2 3 3 2 8 4 3 1 1 7 7 1 9 8
 0 5 7 5 0 6 8 1 2 5 9 8 9 8 2 3 9 0 3 3 9 4 2 3 3 1 4 3 6 8 5 3 7 9 0 8 2
 8 4 3 2 5 9 9 5 3 2 3 1 9 9 3 8 3 7 6 7 2 2 2 9 5 0 8 5 3 8 6 3 2 9 0 1 7
 3 3 3 1 4 7 3 6 1 4 4 5 4 1 3 9 1 5 0 2 3 5 1 4 8 4 6 3 9 8 9 0 0 2 7 6 2
 3 7 0 0 4 9 5 5 8 7 6 6 6 4 4 1 1 5 8 2 1 8 7 1 4 8 5 0 1 5 8 0 0 3 9 9 7
 7 3 2 0 3 3 7 0 8 0 3 5 3 3 5 2 3 3 0 6 1 7 1 7 7 3 9 1 7 1 6 5 1 3 0 1 0
 9 3 6 0 4 0 3 3 0 7 7 1 5 9 0 2 3 4 3 5 7 7 1 1 4 8 7 7 5 0 6 3 7 3 9 6 7
 7 8 3 8 6 3 3 1 5 4 7 6 1 7 9 6 0 6 4 6 0 9 3 9 4 3 1 9 4 7 7 3 4 7 9 7 7
 2 6 3 1 8 1 6 4 4 7 6 8 1 4 4 7 1 2 5 7 6 7 6 7 8 5 2 2 5 7 6 5 7 2 8 0 7
 6 2 3 4 6 8 1 5 5 9 9 7 1 9 3 4 2 9 9 1 9 3 5 0 3 8 4 9 4 7 6 8 8 8 2 0 2
 9 5 2 1 4 2 4 8 0 3 8 3 7 7 9 9 7 3 7 5 4 2 0 2 5 6 2 2 8 7 0 9 1 8 8 4 4
 8 1 6 7 8 3 6 4 4 4 7 5 3 9 3 1 0 7 3 1 9 7 9 6 5 5 0 2 8 3 6 5 0 2 7 1 1
 8 7 7 3 7 5 6 8 6 5 4 7 8 8 6 4 7 3 1 5 7 1 1 1 7 8 1 8 8 1 0 8 2 0 4 8 7
 5 6 3 7 6 3 2 8 1 0 7 2 

In [None]:
# 3. Perform forward propagation on the evaluation data using the trained parameters
# We only need the output A2 for predictions
Z1_dev, A1_dev, Z2_dev, A2_dev = forward_prop(W1, W2, b1, b2, X_dev)

# 4. Get predictions from the output A2_dev
predictions_dev = get_predictions(A2_dev)

# 5. Evaluate accuracy using Y_dev
accuracy_dev = get_accuracy(predictions_dev, Y_dev)

print(f"Accuracy on Development Set: {accuracy_dev:.4f}")
print("-------------------------------------------")
#The ReLu sets the neuron to 0.1
#alpha 0.01 and decreases 10 percent each 50 it
#500 it

[0 1 7 1 2 3 6 1 3 2 5 0 9 9 8 6 4 3 0 1 2 9 7 2 3 3 2 5 4 3 1 1 7 7 1 9 8
 0 5 7 5 0 2 5 1 2 3 9 8 9 3 2 3 9 0 3 3 9 4 2 3 3 1 4 3 6 3 5 3 7 9 0 8 2
 8 4 3 2 5 9 9 5 3 2 3 1 9 9 3 8 3 7 6 7 2 2 2 9 5 0 8 5 3 3 6 3 2 9 0 1 7
 3 3 3 1 4 7 3 6 1 4 4 5 4 1 3 9 1 5 0 2 3 5 1 4 8 4 6 3 5 5 9 0 0 2 7 4 2
 3 7 0 0 4 9 5 5 8 7 6 6 6 4 4 1 1 1 8 2 1 8 7 1 4 8 5 0 1 5 1 0 0 3 9 7 7
 7 3 2 0 3 3 7 0 8 0 3 5 3 3 5 2 3 3 0 6 1 7 1 7 7 3 9 1 7 1 6 3 1 5 0 1 0
 9 3 6 0 4 0 3 3 0 7 7 1 5 9 0 2 3 4 5 5 7 7 1 1 4 8 7 3 5 0 6 5 7 3 9 6 7
 7 8 3 8 6 3 3 1 5 4 7 6 1 7 9 6 0 6 4 6 0 9 8 9 4 3 1 9 4 7 7 3 4 7 9 7 7
 2 6 7 1 8 1 6 4 4 7 6 8 1 4 4 7 1 2 5 7 6 7 4 7 8 8 2 2 5 7 6 5 7 2 8 0 7
 6 2 3 4 6 8 1 5 5 9 9 7 1 9 3 4 2 9 9 1 9 3 5 0 3 8 6 7 4 7 6 8 8 8 6 0 2
 9 5 2 1 4 2 9 8 0 5 8 3 7 7 9 9 7 3 7 5 4 2 0 2 5 6 2 2 8 7 0 9 1 8 8 4 4
 4 1 6 7 8 3 6 4 4 8 9 5 3 9 3 1 0 7 3 1 9 7 8 6 0 8 0 2 8 3 6 5 0 2 7 1 1
 8 7 7 3 7 8 8 8 6 5 4 7 8 8 6 4 7 3 1 5 7 1 1 1 7 8 1 8 8 1 0 8 2 0 4 8 7
 5 6 1 7 6 3 2 8 1 0 7 2 

In [None]:
# 3. Perform forward propagation on the evaluation data using the trained parameters
# We only need the output A2 for predictions
Z1_dev, A1_dev, Z2_dev, A2_dev = forward_prop(W1, W2, b1, b2, X_dev)

# 4. Get predictions from the output A2_dev
predictions_dev = get_predictions(A2_dev)

# 5. Evaluate accuracy using Y_dev
accuracy_dev = get_accuracy(predictions_dev, Y_dev)

print(f"Accuracy on Development Set: {accuracy_dev:.4f}")
print("-------------------------------------------")
#The ReLu function kill the neuron
#1000 it

[1 3 1 8 9 9 1 6 6 1 1 1 7 5 2 3 0 4 0 7 7 8 0 1 0 5 5 6 1 0 3 1 5 7 3 8 6
 7 6 4 6 5 6 0 3 9 9 2 7 6 7 1 2 0 2 8 7 9 8 5 6 1 0 0 2 7 6 7 4 9 1 9 1 4
 0 4 4 5 2 8 3 3 2 0 4 0 5 9 0 0 7 9 7 6 8 9 7 6 6 8 6 7 0 0 1 4 0 6 1 9 4
 7 9 7 8 0 9 0 9 4 6 8 1 9 0 6 4 1 3 8 5 4 4 7 9 3 0 4 9 6 6 9 9 0 9 5 9 3
 6 6 3 8 8 0 5 1 2 3 7 3 7 8 5 7 1 5 3 8 4 8 7 4 5 9 4 6 4 1 9 8 5 4 9 6 9
 1 2 5 9 8 2 8 5 0 2 9 1 5 6 0 2 7 1 6 1 2 9 7 6 3 4 8 4 1 8 5 3 6 2 8 1 0
 0 4 5 3 7 8 0 6 2 8 0 7 1 0 8 0 9 3 3 8 9 9 0 2 7 5 0 6 2 9 6 7 3 8 3 9 7
 7 3 6 3 9 8 4 2 1 6 5 1 8 0 7 1 1 0 6 0 6 8 3 1 6 7 9 3 0 4 5 0 8 0 5 4 0
 1 3 3 0 7 9 0 2 5 1 4 4 7 9 6 2 8 2 6 7 8 0 4 2 5 5 0 9 3 9 4 1 8 6 7 7 2
 0 6 7 5 3 1 7 2 2 7 9 6 5 2 9 0 1 7 9 5 5 1 4 3 3 5 1 6 7 7 4 1 6 6 8 4 0
 1 3 7 4 9 1 7 3 8 7 6 1 1 9 7 6 0 4 1 9 4 1 4 9 7 5 1 7 2 5 6 3 9 0 7 5 4
 1 7 1 8 7 8 0 5 9 0 0 2 3 7 5 0 3 5 8 7 3 7 5 5 7 4 6 3 7 7 3 1 5 3 8 3 1
 0 9 6 9 9 1 7 0 1 9 5 0 7 2 0 7 5 7 8 2 2 3 5 4 9 8 2 8 7 6 4 5 4 5 9 2 9
 3 5 9 4 3 3 5 7 8 7 7 6 