# MNSIT Nueral Network


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist


(x_train, y_train), (x_test, y_test) = mnist.load_data()


x_train = x_train.reshape(-1, 28*28).astype(np.float32) / 255.0
x_test = x_test.reshape(-1, 28*28).astype(np.float32) / 255.0

def one_hot_encode(y, num_classes=10):
    encoded = np.zeros((y.shape[0], num_classes))
    encoded[np.arange(y.shape[0]), y] = 1
    return encoded

y_train_oh = one_hot_encode(y_train)
y_test_oh = one_hot_encode(y_test)

print(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")


In [None]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(np.float32)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)
def cross_entropy(pred, target):
    pred = np.clip(pred, 1e-9, 1 - 1e-9)
    return -np.mean(np.sum(target * np.log(pred), axis=1))

def cross_entropy_derivative(pred, target):
    return pred - target


In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.w1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.w2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, x):
        self.z1 = x @ self.w1 + self.b1
        self.a1 = relu(self.z1)
        self.z2 = self.a1 @ self.w2 + self.b2
        self.a2 = softmax(self.z2)
        return self.a2

    def backward(self, x, y, output, lr=0.01):
        m = y.shape[0]
        dz2 = cross_entropy_derivative(output, y)
        dw2 = self.a1.T @ dz2 / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        dz1 = (dz2 @ self.w2.T) * relu_derivative(self.z1)
        dw1 = x.T @ dz1 / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.w2 -= lr * dw2
        self.b2 -= lr * db2
        self.w1 -= lr * dw1
        self.b1 -= lr * db1
def accuracy(predictions, labels):
    return np.mean(np.argmax(predictions, axis=1) == np.argmax(labels, axis=1))

def train(model, x_train, y_train, x_test, y_test, epochs=10, batch_size=64, lr=0.01):
    for epoch in range(epochs):
        indices = np.random.permutation(len(x_train))
        x_train_shuffled = x_train[indices]
        y_train_shuffled = y_train[indices]

        for i in range(0, len(x_train), batch_size):
            x_batch = x_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]
            output = model.forward(x_batch)
            model.backward(x_batch, y_batch, output, lr)

        # Epoch evaluation
        train_output = model.forward(x_train)
        test_output = model.forward(x_test)
        train_loss = cross_entropy(train_output, y_train)
        test_loss = cross_entropy(test_output, y_test)
        test_acc = accuracy(test_output, y_test)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | Test Acc: {test_acc*100:.2f}%")

model = NeuralNetwork(input_size=784, hidden_size=128, output_size=10)

train(model, x_train, y_train_oh, x_test, y_test_oh, epochs=10, batch_size=64, lr=0.1)


In [None]:
def visualize_weights(weights, num_rows=8, num_cols=8):
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 12))
    for i, ax in enumerate(axes.flat):
        if i < weights.shape[1]:
            img = weights[:, i].reshape(28, 28)
            ax.imshow(img, cmap='viridis')
            ax.axis('off')
    plt.suptitle("First Layer Weights (as Images)", fontsize=16)
    plt.tight_layout()
    plt.show()

visualize_weights(model.w1)


In [None]:
def train(model, x_train, y_train, x_test, y_test, epochs=100, batch_size=64, lr=0.01):
    history = {
        'train_loss': [],
        'test_loss': [],
        'test_accuracy': []
    }

    for epoch in range(epochs):
        indices = np.random.permutation(len(x_train))
        x_train_shuffled = x_train[indices]
        y_train_shuffled = y_train[indices]

        for i in range(0, len(x_train), batch_size):
            x_batch = x_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]
            output = model.forward(x_batch)
            model.backward(x_batch, y_batch, output, lr)

        # Evaluate at end of epoch
        train_output = model.forward(x_train)
        test_output = model.forward(x_test)

        train_loss = cross_entropy(train_output, y_train)
        test_loss = cross_entropy(test_output, y_test)
        test_acc = accuracy(test_output, y_test)

        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['test_accuracy'].append(test_acc)

        # Print every 10 epochs
        if (epoch + 1) % 10 == 0 or epoch == 0:
            print(f"Epoch {epoch+1}/{epochs} | "
                  f"Train Loss: {train_loss:.4f} | "
                  f"Test Loss: {test_loss:.4f} | "
                  f"Test Acc: {test_acc * 100:.2f}%")

    return history

# Run the training
history = train(model, x_train, y_train_oh, x_test, y_test_oh, epochs=100, batch_size=64, lr=0.1)

# Plot Loss and Accuracy Over 100 Epochs
epochs_range = range(1, len(history['train_loss']) + 1)
plt.figure(figsize=(12, 5))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(epochs_range, history['train_loss'], label='Train Loss')
plt.plot(epochs_range, history['test_loss'], label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Over Epochs')
plt.legend()

# Accuracy Plot
plt.subplot(1, 2, 2)
plt.plot(epochs_range, [a * 100 for a in history['test_accuracy']], label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Test Accuracy Over Epochs')
plt.legend()

plt.tight_layout()
plt.show()
