# Test File

## 1. Import Necessary Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from ANN import *

## 2. Load the MNIST Dataset

In [2]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, parser='auto')

## 3. Preprocessing (Formatting the data)

In [None]:
X = X.to_numpy() if hasattr(X, 'to_numpy') else np.array(X)
y = y.to_numpy() if hasattr(y, 'to_numpy') else np.array(y)
y = y.astype(int) # numpy can't implicit cast, so explicit cast here

# shuffles the contents
idx = np.random.choice(len(X), 70000, replace=False)
X, y = X[idx], y[idx]
# normalizes the RGB scale for the black-white value
X = X / 255.0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 4. Model Preparation

### a. From scratch ANN model

In [8]:
# Initiate the layers
inp = Layer(linear, [he_init, {"seed": 42}], 784) 
hid_1 = Layer(h_tan, [he_init, {"seed": 47}], 64)
hid_2 = Layer(h_tan, [he_init, {"seed": 45}], 64)
hid_3 = Layer(h_tan, [he_init, {"seed": 43}], 64)
hid_4 = Layer(h_tan, [he_init, {"seed": 48}], 64)
out = Layer(softmax, [he_init, {"seed": 44}], 10)

# Inititate the model
scratch_model = ANN(None, [hid_1], input=inp, output=out, error=CCE) 
scratch_model.train(X_train, y_train, batch_size=32, l_rate=0.01, epoch=5, verb=0)

(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(7

{'train_loss': [0.6510729101438498,
  0.3677235073128312,
  0.31732346657558935,
  0.2887878938598975,
  0.26877665871642165],
 'val_loss': []}

### b. sklearn library ANN model

In [12]:
mlp = MLPClassifier(hidden_layer_sizes=(64,), activation='identity', solver='sgd', 
                      max_iter=5, random_state=42, learning_rate_init=0.01, batch_size=32) # initiate the model

## 5. Model Testing and Comparison

In [13]:
preds = scratch_model.predict(X_test)
test_acc = accuracy_score(y_test, np.argmax(preds, axis=1))
print(f"Test accuracy: {test_acc:.4f}")

mlp.fit(X_train, y_train)
mlp_acc = mlp.score(X_test, y_test)
print(f"sklearn accuracy: {mlp_acc:.4f}")

(14000, 785)
(785, 64)
(14000, 65)
(65, 10)
Test accuracy: 0.9242
sklearn accuracy: 0.9101




## 6. Other Feature Testing

In [14]:
def to_one_hot(y, num_classes=10):
    one_hot = np.zeros((len(y), num_classes))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot

### a. Activation Function Test

In [19]:
def test_activation():
    y_train_hot = to_one_hot(y_train)
        
    acts = {"ReLU": relU, "Sigmoid": sigmoid, "Tanh": h_tan, "Linear": linear}
    results = {}

    for name, act in acts.items():
        inp = Layer(act, [xavier_init, {"seed": 42}], 784)
        hid = Layer(act, [xavier_init, {"seed": 43}], 64)
        out = Layer(softmax, [xavier_init, {"seed": 44}], 10)
        
        model = ANN(None, [hid], input=inp, output=out, error=CCE)
        
        start = time.time()
        model.train(X_train, y_train_hot, batch_size=32, l_rate=0.01, epoch=3, verb=0)
        train_time = time.time() - start
        
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, np.argmax(preds, axis=1))
        
        results[name] = {'accuracy': acc, 'time': train_time}
        print(f"{name}: Acc={acc:.4f}, Time={train_time:.2f}s")

In [None]:
test_activation()

### b. Weight Initialization Test

In [17]:
def test_init_methods():
    y_train_hot = to_one_hot(y_train)
    
    inits = {
        "Zero": zero_init,
        "Uniform": random_uniform,
        "Normal": random_normal,
        "Xavier": xavier_init,
        "He": he_init
    }
    
    for name, init in inits.items():
        inp = Layer(relU, [init, {"seed": 42}], 784)
        hid = Layer(relU, [init, {"seed": 43}], 64)
        out = Layer(softmax, [init, {"seed": 44}], 10)
        
        model = ANN(None, [hid], input=inp, output=out, error=CCE)
        model.train(X_train, y_train_hot, batch_size=32, l_rate=0.01, epoch=2, verb=0)
        
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, np.argmax(preds, axis=1))
        
        print(f"{name} init: {acc:.4f}")

In [18]:
test_init_methods()

(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(7

### c. Loss Function Test

In [20]:
def test_losses():
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y_bin = np.array([[0], [1], [1], [0]])
    
    for loss_name, loss_fn in [("MSE", MSE), ("BCE", BCE)]:
        inp = Layer(sigmoid, [xavier_init, {"seed": 42}], 2)
        hid = Layer(sigmoid, [xavier_init, {"seed": 43}], 4)
        out = Layer(sigmoid, [xavier_init, {"seed": 44}], 1)
        
        model = ANN(None, [hid], input=inp, output=out, error=loss_fn)
        hist = model.train(X, y_bin, batch_size=4, l_rate=0.5, epoch=100, verb=0)
        
        preds = model.predict(X)
        print(f"{loss_name}: loss={hist['train_loss'][-1]:.4f}, preds={preds.flatten().round(2)}")

In [21]:
test_losses()

(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)
(5, 1)
(4, 3)
(3, 4)
(4, 5)

### d. Save and Load Test

In [22]:
def test_save_load():
    y_train_hot = to_one_hot(y_train)
    
    inp = Layer(relU, [xavier_init, {"seed": 42}], 784)
    hid = Layer(relU, [xavier_init, {"seed": 43}], 64)
    out = Layer(softmax, [xavier_init, {"seed": 44}], 10)
    
    model = ANN(None, [hid], input=inp, output=out, error=CCE)
    model.train(X_train, y_train_hot, batch_size=32, l_rate=0.01, epoch=2, verb=0)
    
    preds_before = model.predict(X_test[:5])
    model.save("test_model.pkl")
    
    new_model = ANN(load_path="test_model.pkl")
    preds_after = new_model.predict(X_test[:5])
    
    match = np.allclose(preds_before, preds_after)
    print(f"Save/load test: {'Pass' if match else 'Fail'}")

In [23]:
test_save_load()

(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(785, 64)
(32, 65)
(65, 10)
(32, 785)
(7

### e. Visualization Test

In [24]:
def test_viz():
    inp = Layer(sigmoid, [xavier_init, {"seed": 42}], 2)
    hid = Layer(sigmoid, [xavier_init, {"seed": 43}], 3)
    out = Layer(sigmoid, [xavier_init, {"seed": 44}], 1)
    
    model = ANN(None, [hid], input=inp, output=out, error=MSE)
    
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])
    
    model.train(X, y, batch_size=4, l_rate=0.5, epoch=100, verb=0)
    
    model.visualize_network(filename="network_viz.png")
    model.w_dist_show(filename="weight_dist.png")
    model.wg_dist_show(filename="weight_grad_dist.png")
    model.plot_training_history(filename="training_hist.png")

In [25]:
test_viz()

(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)
(4, 1)
(4, 3)
(3, 3)
(4, 4)