In [14]:
import numpy as np

def read_idx1_ubyte(file_path):
    with open(file_path, 'rb') as file:
        magic_number = int.from_bytes(file.read(4), 'big')
        number_of_items = int.from_bytes(file.read(4), 'big')
        labels = list(file.read(number_of_items))
    return labels


def read_idx3_ubyte(file_path):
    with open(file_path, 'rb') as file:
        magic_number = int.from_bytes(file.read(4), 'big')
        number_of_images = int.from_bytes(file.read(4), 'big')
        rows = int.from_bytes(file.read(4), 'big')
        cols = int.from_bytes(file.read(4), 'big')
        images = np.frombuffer(file.read(), dtype=np.uint8)
        images = images.reshape(number_of_images, rows, cols)
    return images


In [15]:
# Helper Functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def one_hot_encode(y, num_labels=10):
    encoded = np.zeros((num_labels,))
    encoded[y] = 1
    return encoded

def print_test_predictions(test_data, weights):
    for i, (x, y) in enumerate(test_data):
        predicted_label = predict(x, weights)
        print(f"Test Sample {i + 1}: Predicted Label = {predicted_label}, Actual Label = {y}")

import pandas as pd

def save_weights_to_excel(weight_sets, file_name='weights.xlsx'):
    with pd.ExcelWriter(file_name, engine='openpyxl') as writer:
        for layer_config, weights in weight_sets.items():
            for i, weight_matrix in enumerate(weights):
                df = pd.DataFrame(weight_matrix)
                sheet_name = f"{layer_config}_W{i + 1}"
                df.to_excel(writer, sheet_name=sheet_name, index=False)

In [16]:
# Core Functions
def initialize_weights(layers):
    weights = []
    for i in range(len(layers) - 1):
        weight = np.random.randn(layers[i] + 1, layers[i + 1]) * 0.01
        weights.append(weight)
    return weights

def feedforward(x, weights):
    activations = [np.insert(x, 0, 1)]  # Add bias term to input
    for i in range(len(weights)):
        z = np.dot(activations[-1], weights[i])
        a = sigmoid(z)
        if i < len(weights) - 1:  # Add bias term to all but output layer
            a = np.insert(a, 0, 1)
        activations.append(a)
    return activations

def backpropagation(y, activations, weights):
    y_vec = one_hot_encode(y)
    # Calculate the error for the output layer (Cross-Entropy Loss)
    delta_L = activations[-1] * (1 - activations[-1]) * (activations[-1] - y_vec)
    deltas = [delta_L]

    # Calculate the errors for the rest of the layers
    for i in range(len(activations) - 2, 0, -1):
        delta = np.dot(deltas[0], weights[i].T) * sigmoid_derivative(activations[i])
        delta = delta[1:]  # Remove bias term
        deltas.insert(0, delta)
    return deltas

def compute_gradients(activations, deltas):
    gradients = []
    for i in range(len(deltas)):
        grad = np.outer(activations[i], deltas[i])
        gradients.append(grad)
    return gradients

def gradient_descent(weights, gradients, learning_rate):
    for i in range(len(weights)):
        weights[i] -= learning_rate * gradients[i]
    return weights

def predict(x, weights):
    activations = feedforward(x, weights)
    return np.argmax(activations[-1])

def compute_accuracy(dataset, weights):
    correct_predictions = 0
    for x, y in dataset:
        predicted_label = predict(x, weights)
        if predicted_label == y:
            correct_predictions += 1
    return correct_predictions / len(dataset)


In [17]:
from tqdm import tqdm

def init_network(layer_sizes):
    return initialize_weights(layer_sizes)

def train_network(D1, learning_rate, layer_sizes, num_iterations=50):
    weights = init_network(layer_sizes)
    for epoch in tqdm(range(num_iterations), desc="Training Progress"):
        for x, y in D1:
            activations = feedforward(x, weights)
            deltas = backpropagation(y, activations, weights)
            gradients = compute_gradients(activations, deltas)
            weights = gradient_descent(weights, gradients, learning_rate)

    return weights

In [18]:
train_labels = read_idx1_ubyte("train-labels.idx1-ubyte")
train_images = read_idx3_ubyte("train-images.idx3-ubyte")
test_labels = read_idx1_ubyte("t10k-labels.idx1-ubyte")
test_images = read_idx3_ubyte("t10k-images.idx3-ubyte")

# Flatten the images and normalize pixel values
train_images = train_images.reshape(train_images.shape[0], -1) / 255.0
test_images = test_images.reshape(test_images.shape[0], -1) / 255.0


In [19]:
import random

# Select a smaller batch for training and testing
#batch_size = 1000  # Example batch size
#train_batch_indices = random.sample(range(len(train_images)), batch_size)
#test_batch_indices = random.sample(range(len(test_images)), batch_size)

# D1 = [(train_images[i], train_labels[i]) for i in train_batch_indices]
# D2 = [(test_images[i], test_labels[i]) for i in test_batch_indices]

# Form training and testing datasets (D1 and D2)
D1 = list(zip(train_images, train_labels))
D2 = list(zip(test_images, test_labels))

In [20]:
layer_configurations = [[784, 30, 10]]       # One hidden layer

In [21]:
weight_sets = {}
for layer_sizes in layer_configurations:
    print(f"Training with layer configuration: {layer_sizes}")
    layer_config_str = '-'.join(map(str, layer_sizes))  # Create a string like '784-30-10'
    trained_weights = train_network(D1, learning_rate=0.01, layer_sizes=layer_sizes, num_iterations=7)
    weight_sets[layer_config_str] = trained_weights
    accuracy = compute_accuracy(D2, trained_weights)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print_test_predictions(D2, trained_weights)
    
save_weights_to_excel(weight_sets, 'neural_network_weights.xlsx')

Training with layer configuration: [784, 30, 10]


Training Progress: 100%|██████████| 7/7 [00:55<00:00,  7.96s/it]


Accuracy: 88.93%
Test Sample 1: Predicted Label = 7, Actual Label = 7
Test Sample 2: Predicted Label = 2, Actual Label = 2
Test Sample 3: Predicted Label = 1, Actual Label = 1
Test Sample 4: Predicted Label = 0, Actual Label = 0
Test Sample 5: Predicted Label = 4, Actual Label = 4
Test Sample 6: Predicted Label = 1, Actual Label = 1
Test Sample 7: Predicted Label = 4, Actual Label = 4
Test Sample 8: Predicted Label = 9, Actual Label = 9
Test Sample 9: Predicted Label = 6, Actual Label = 5
Test Sample 10: Predicted Label = 9, Actual Label = 9
Test Sample 11: Predicted Label = 0, Actual Label = 0
Test Sample 12: Predicted Label = 8, Actual Label = 6
Test Sample 13: Predicted Label = 9, Actual Label = 9
Test Sample 14: Predicted Label = 0, Actual Label = 0
Test Sample 15: Predicted Label = 1, Actual Label = 1
Test Sample 16: Predicted Label = 5, Actual Label = 5
Test Sample 17: Predicted Label = 9, Actual Label = 9
Test Sample 18: Predicted Label = 7, Actual Label = 7
Test Sample 19: Pred