In [2]:
import sys
import os
sys.path.append('..')
from backPropMNIST import *

# 1 - Weights Initialization methods

In [3]:
data_train_path = "../datasets/mnist/mnist_train.csv"
data_test_path = "../datasets/mnist/mnist_test.csv"

data = DataLoader(data_train_path).load_data()
train_labels = data['5'].to_numpy()
train_data = data.drop(columns=['5']).to_numpy()

test_data = DataLoader(data_test_path).load_data()
test_labels = test_data['7'].to_numpy()
test_data = test_data.drop(columns=['7']).to_numpy()





In [None]:
# Network Name = Simple 2 Layer Network simTwoLayers
input_size = train_data.shape[1]
output_size = 10
num_hidden_layers = 2
hidden_size = [100, 100]
activation_function = 'relu'
learning_rate = 0.001
epochs = 100
batch_size = 16

weights_methods = ['zero', 'random', 'he', 'xavier']
experiments_number = 5
model_name = 'simTwoLayers'
accuracies = {'zero': [], 'random': [], 'he': [], 'xavier': []}

for weights_method in weights_methods:
    accuracies[weights_method] = []
    for i in range(experiments_number):
        print('-'*100)
        print(f'Experiment {i+1} of {weights_method}')
        network = NeuralNetwork(
            input_size=input_size, 
            num_hidden_layers=num_hidden_layers, 
            hidden_size=hidden_size, 
            output_size=output_size, 
            activation_function=activation_function, 
            learning_rate=learning_rate, 
            epochs=epochs, 
            batch_size=batch_size,
            weights_method=weights_method
        )
        network.initialize_weights_biases()
        network.train(train_data, train_labels)
        os.makedirs(f'models/{model_name}/experiments/{weights_method}', exist_ok=True)
        network.save_weights_biases(
            f'models/{model_name}/experiments/{weights_method}/weights_{weights_method}_{i}.pkl', 
            f'models/{model_name}/experiments/{weights_method}/biases_{weights_method}_{i}.pkl'
        )

        predictions = network.test(test_data, test_labels)
        accuracy = network.accuracy(test_labels, predictions)
        print(f"Accuracy: {accuracy:.4f}")
        accuracies[weights_method].append(accuracy)



In [None]:
input_size = train_data.shape[1]
output_size = 10
num_hidden_layers = 2
hidden_size = [100, 100]
activation_function = 'relu'
learning_rate = 0.001
epochs = 100
weights_method = 'xavier'
model_name = 'simTwoLayers_batchSize'

accuracies = {'4': [], '8': [], '16': [], '32': [], '64': [], '128': []}
batch_size_list = [4, 8, 16, 32, 64, 128]
for batch_size in batch_size_list:
    for i in range(experiments_number):
        network = NeuralNetwork(
            input_size=input_size, 
            num_hidden_layers=num_hidden_layers, 
            hidden_size=hidden_size, 
            output_size=output_size, 
            activation_function=activation_function, 
            learning_rate=learning_rate, 
            epochs=epochs, 
            batch_size=batch_size,
            weights_method=weights_method
        )
        network.initialize_weights_biases()
        network.train(train_data, train_labels)
        os.makedirs(f'models/{model_name}/experiments/{weights_method}', exist_ok=True)
        network.save_weights_biases(
        f'models/{model_name}/experiments/{weights_method}/weights_{batch_size}.pkl', 
            f'models/{model_name}/experiments/{weights_method}/biases_{batch_size}.pkl'
        )

        predictions = network.test(test_data, test_labels)
        accuracy = network.accuracy(test_labels, predictions)
        print(f"Accuracy: {accuracy:.4f}")
        accuracies[str(batch_size)].append(accuracy)

accuracies

In [None]:
# Different architectures
input_size = train_data.shape[1]
batch_size = 64
output_size = 10
activation_function = 'relu'
learning_rate = 0.001
epochs = 200
weights_method = 'xavier'
num_experiments = 5

different_architectures = [
    {'num_hidden_layers': 3, 'hidden_size': [400, 100, 50], 'name': 'md_400_100_50'},
    {'num_hidden_layers': 3, 'hidden_size': [800, 200, 100], 'name': 'md_800_200_100'},
    {'num_hidden_layers': 3, 'hidden_size': [1600, 400, 200], 'name': 'md_1600_400_200'},
]


model_name = 'simTwoLayers_architecture'
accuracies = {
    'md_400_100_50': [], 
    'md_800_200_100': [], 
    'md_1600_400_200': []
}
for architecture in different_architectures:
    for i in range(num_experiments):
        network = NeuralNetwork(
            input_size=input_size,
            num_hidden_layers=architecture['num_hidden_layers'],
            hidden_size=architecture['hidden_size'],
            output_size=output_size,
            activation_function=activation_function,
            learning_rate=learning_rate,
            epochs=epochs,
            batch_size=batch_size,
            weights_method=weights_method
        )
        network.initialize_weights_biases()
        network.train(train_data, train_labels)
        os.makedirs(f'models/{model_name}/experiments/{weights_method}', exist_ok=True)
        network.save_weights_biases(
            f'models/{model_name}/experiments/{weights_method}/weights_{architecture["name"]}.pkl', 
            f'models/{model_name}/experiments/{weights_method}/biases_{architecture["name"]}.pkl'
        )

        predictions = network.test(test_data, test_labels)
        accuracy = network.accuracy(test_labels, predictions)
        print(f"Accuracy: {accuracy:.4f}")
        accuracies[architecture['name']].append(accuracy)



In [None]:
for key, value in accuracies.items():
    print(f"{key}: {np.mean(value):.4f} ± {np.std(value):.4f}")


In [None]:
model_path = '../models/two_layer_100_100_lr_0.001/run_1/'
network = NeuralNetwork(
        input_size=784,
        num_hidden_layers=2,
        hidden_size=[100, 100],
        output_size=10,
        activation_function='relu',
        learning_rate=0.001,
        epochs=1,  # Not used for inference
        batch_size=1,  # Not used for inference
        weights_method='xavier'
    )
network.initialize_weights_biases(
    path_weights=f"{model_path}/weights.npz",
    path_biases=f"{model_path}/biases.npz"
)
    


In [None]:
network.test(test_data[0].reshape(1, -1)/255, None)

In [None]:
from PIL import Image, ImageDraw

Image.fromarray(test_data[10].reshape(28, 28).astype(np.uint8))

In [None]:
weights_shape = [weight.shape for weight in network.weights]
weights_shape[]