In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(z):
    s = 1 / (1 + np.exp(-z))
    return s * (1 - s)

def cost(output, target):
    return np.mean((output - target) ** 2)

In [5]:
# Layer Class
class Layer:
    def __init__(self, number, size):
        self._number = number
        self._size = size
        self._data = None 

    @property
    def number(self):
        return self._number

    @property
    def size(self):
        return self._size

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, new_data):
        if new_data.shape[1] == self._size:
            self._data = new_data
        else:
            raise ValueError("Shape mismatch while setting layer data.")

# Weight Class
class Weight:
    def __init__(self, from_layer, to_layer):
        self._value = np.random.randn(from_layer.size, to_layer.size).astype(np.float32)

    @property
    def value(self):
        return self._value

    @value.setter
    def value(self, new_value):
        if new_value.shape == self._value.shape:
            self._value = new_value
        else:
            raise ValueError("Shape mismatch while setting weight value.")

# Bias Class
class Bias:
    def __init__(self, to_layer):
        self._value = np.random.randn(1, to_layer.size).astype(np.float32)

    @property
    def value(self):
        return self._value

    @value.setter
    def value(self, new_value):
        if new_value.shape == self._value.shape:
            self._value = new_value
        else:
            raise ValueError("Shape mismatch while setting bias value.")

# Neural Network Class
class Neural:
    def __init__(self, activation, learning_rate=0.01):
        self._activation = activation
        self._learning_rate = learning_rate
        self._layers = []
        self._weights = []
        self._biases = []
        self._deltas = []
        self._zs = []
        self._djdw = []
        self._djdb = []

    def add_layer(self, layer):
        self._layers.append(layer)
        if len(self._layers) > 1:
            self._weights.append(Weight(self._layers[-2], self._layers[-1]))
            self._biases.append(Bias(self._layers[-1]))

    def forward(self):
        self._zs = [None] * len(self._layers)
        for i in range(1, len(self._layers)):
            prev_data = self._layers[i-1].data
            w = self._weights[i-1].value
            b = self._biases[i-1].value
            z = np.dot(prev_data, w) + b
            self._zs[i] = z
            activated = self._activation(z)
            self._layers[i].data = activated

    @property
    def output(self):
        return self._layers[-1].data

    @property
    def delta(self):
        return self._deltas

    @delta.setter
    def delta(self, target):
        self._deltas = [None] * len(self._layers)
        self._djdw = [None] * len(self._weights)
        self._djdb = [None] * len(self._biases)

        output = self._layers[-1].data
        z_last = self._zs[-1]
        if z_last is None:
            raise ValueError("Last layer is None. Ensure forward() is called before setting delta.")
        self._deltas[-1] = (output - target) * sigmoid_derivative(z_last)

        for i in range(len(self._layers) - 2, 0, -1):
            next_w = self._weights[i].value
            next_delta = self._deltas[i + 1]
            z = self._zs[i]
            self._deltas[i] = np.dot(next_delta, next_w.T) * sigmoid_derivative(z)

        # djdw and djdb
        for i in range(len(self._weights)):
            a_prev = self._layers[i].data
            delta_curr = self._deltas[i + 1]
            self._djdw[i] = np.dot(a_prev.T, delta_curr)
            self._djdb[i] = np.sum(delta_curr, axis=0, keepdims=True)
    @property
    def djdw(self):
        return self._djdw

    @property
    def djdb(self):
        return self._djdb

    def update(self):
        for i in range(len(self._weights)):
            self._weights[i].value -= self._learning_rate * self._djdw[i]
            self._biases[i].value -= self._learning_rate * self._djdb[i]
            

In [None]:
# Load Dataset
file_name = 'mnist_test.csv'
df = pd.read_csv(file_name)

labels = df['label'].values
pixels = df.drop('label', axis=1).values.astype(np.uint8)

# Batch Setup
n = int(input("Enter the batch size of images : "))
max_idx = len(labels) - 1
if not (0 <= n <= max_idx):
    raise ValueError(f"Batch size exceeded the original size. Must be between 0 and {max_idx}.")
idxs = list(range(n))

# Display All Images
for idx in idxs:
    print(f"{idx+1}. Label: {labels[idx]}")
    display(Image.fromarray(pixels[idx].reshape(28, 28), mode='L'))

# Normalize and Prepare Input Batch
images = pixels[idxs].astype(np.float32) / 255.0  
targets = np.zeros((n, 10), dtype=np.float32)    
for i in range(n):
    targets[i, labels[idxs[i]]] = 1

# Initialise Network
net = Neural(sigmoid)
input_layer = Layer(1, 784)
input_layer.data = images
net.add_layer(input_layer)

net.add_layer(Layer(2, 107))
net.add_layer(Layer(3, 26))
net.add_layer(Layer(4, 10))

track_cost = []
# Training the Network
training_rounds = 10000
for i in range(training_rounds):
    net.forward()

    # Delta Calculation
    net.delta = targets

    # Update Weights and Biases
    net.update()

    # Track Cost
    current_cost = cost(net.output, targets) * 0.5
    track_cost.append(current_cost)

# For printing deltas, djdw, and djdb
"""
# Print Delta for each layer
print("Delta for each layer:")
for i, d in enumerate(net.delta):
    if d is not None:
        print(f"Layer {i} Delta:\n{d}\n")

# Print dJdW
print("dJdW Calculation:")
for i, grad in enumerate(net.djdw):
    print(f"dJ/dW for Layer {i} → {i+1}:\n{grad}\n")

# Print dJdB
print("dJdB Calculation:")
for i, grad in enumerate(net.djdb):
    print(f"dJ/db for Layer {i} → {i+1}:\n{grad}\n")

"""
# Plotting the Cost Function
plt.plot(range(training_rounds),track_cost)
plt.xlabel('Training Rounds')
plt.ylabel('Cost')
plt.title('Cost vs Training Rounds')
plt.grid(True)
plt.show()

# Final Forward Pass
net.forward()

# FInal Cost
final_cost = cost(net.output, targets) * 0.5

# Predicted Digits
predictions = np.argmax(net.output, axis=1)
print("Predicted Digits: \n", predictions)

# Actual Digits/Labels
actual_labels = np.argmax(targets, axis=1)
print("Actual Digits:\n", actual_labels)