In [None]:
import numpy as np
import requests
from io import StringIO


def load_data():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra"
    response = requests.get(url)
    data = np.genfromtxt(StringIO(response.text), delimiter=',')
    np.random.shuffle(data)
    X = data[:, :-1]
    y = data[:, -1].astype(int)
    X /= 16.0
    return X, y

class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size1)
        self.b1 = np.zeros((1, hidden_size1))
        self.W2 = np.random.randn(hidden_size1, hidden_size2)
        self.b2 = np.zeros((1, hidden_size2))
        self.W3 = np.random.randn(hidden_size2, output_size)
        self.b3 = np.zeros((1, output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def forward(self, X):
        # Hidden Layer 1
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)

        # Hidden Layer 2
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)

        # Output Layer
        self.z3 = np.dot(self.a2, self.W3) + self.b3
        self.probs = self.softmax(self.z3)

        return self.probs

    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        # Output Layer
        delta3 = self.probs
        delta3[range(m), y] -= 1
        dW3 = np.dot(self.a2.T, delta3)
        db3 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = np.dot(delta3, self.W3.T) * self.sigmoid_derivative(self.a2)
        # Hidden Layer 2
        dW2 = np.dot(self.a1.T, delta2)
        db2 = np.sum(delta2, axis=0, keepdims=True)
        delta1 = np.dot(delta2, self.W2.T) * self.sigmoid_derivative(self.a1)
        # Hidden Layer 1
        dW1 = np.dot(X.T, delta1)
        db1 = np.sum(delta1, axis=0)

        # Update weights and biases
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W3 -= learning_rate * dW3
        self.b3 -= learning_rate * db3

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def calculate_loss(self, X, y):
        m = X.shape[0]
        correct_logprobs = -np.log(self.probs[range(m), y])
        data_loss = np.sum(correct_logprobs)
        return 1. / m * data_loss

    def evaluate(self, X, y):
        predicted_class = np.argmax(self.forward(X), axis=1)
        accuracy = np.mean(predicted_class == y)
        return accuracy * 100


def train(X_train, y_train, X_test, y_test, num_epochs=400, learning_rate=0.0001):
    input_size = X_train.shape[1]
    hidden_size1 = 100
    hidden_size2 = 100
    output_size = 10
    model = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)

    for epoch in range(num_epochs):
        # Forward propagation
        model.forward(X_train)
        # Backpropagation
        model.backward(X_train, y_train, learning_rate)

        # Print loss and accuracy every 20 epochs
        if epoch % 20 == 0:
            train_acc = model.evaluate(X_train, y_train)
            test_acc = model.evaluate(X_test, y_test)
            print(f"Epoch: {epoch}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

    # Calculate final accuracy and loss
    final_train_acc = model.evaluate(X_train, y_train)
    final_train_loss = model.calculate_loss(X_train, y_train)
    final_test_acc = model.evaluate(X_test, y_test)
    final_test_loss = model.calculate_loss(X_test, y_test)

    # Print final accuracy and loss
    print(f"Final Train Accuracy: {final_train_acc:.2f}%, Final Train Loss: {final_train_loss:.4f}")
    print(f"Final Test Accuracy: {final_test_acc:.2f}%, Final Test Loss: {final_test_loss:.4f}")

    return model

X, y = load_data()
split_ratio = 0.8
split_index = int(split_ratio * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

model = train(X_train, y_train, X_test, y_test)

# Make predictions on a few test examples
num_examples = 5
random_indices = np.random.choice(X_test.shape[0], num_examples, replace=False)
X_examples = X_test[random_indices]
y_examples = y_test[random_indices]
predicted_labels = np.argmax(model.forward(X_examples), axis=1)

# Show predicted and actual labels
for i in range(num_examples):
    print(f"Example {i+1}: Predicted Label: {predicted_labels[i]}, Actual Label: {y_examples[i]}")


Epoch: 0, Train Acc: 5.40%, Test Acc: 5.75%
Epoch: 20, Train Acc: 66.87%, Test Acc: 65.23%
Epoch: 40, Train Acc: 80.31%, Test Acc: 79.74%
Epoch: 60, Train Acc: 85.81%, Test Acc: 85.62%
Epoch: 80, Train Acc: 88.62%, Test Acc: 87.97%
Epoch: 100, Train Acc: 90.65%, Test Acc: 89.28%
Epoch: 120, Train Acc: 91.86%, Test Acc: 90.85%
Epoch: 140, Train Acc: 92.51%, Test Acc: 91.50%
Epoch: 160, Train Acc: 93.36%, Test Acc: 92.55%
Epoch: 180, Train Acc: 93.95%, Test Acc: 93.20%
Epoch: 200, Train Acc: 94.24%, Test Acc: 93.59%
Epoch: 220, Train Acc: 94.67%, Test Acc: 93.73%
Epoch: 240, Train Acc: 94.87%, Test Acc: 94.38%
Epoch: 260, Train Acc: 95.39%, Test Acc: 94.51%
Epoch: 280, Train Acc: 95.52%, Test Acc: 94.38%
Epoch: 300, Train Acc: 95.72%, Test Acc: 94.51%
Epoch: 320, Train Acc: 96.01%, Test Acc: 94.90%
Epoch: 340, Train Acc: 96.17%, Test Acc: 95.16%
Epoch: 360, Train Acc: 96.44%, Test Acc: 95.69%
Epoch: 380, Train Acc: 96.60%, Test Acc: 95.82%
Final Train Accuracy: 96.73%, Final Train Loss: 