In [1]:
from sklearn.datasets import fetch_openml, load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from copy import deepcopy
import seaborn as sns
import itertools
import copy
import autograd.numpy as np
import pandas as pd
from tqdm import tqdm

from activation_functions import *
from cost_functions import *
from FFNN import *

# Setting the random seed
np.random.seed(42)

In [2]:
# Fetch the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
# Extract data (features) and target (labels)
X = mnist.data
y = mnist.target

# Scaling the mnist pixel values from 0-255 to 0-1
X = X / 255.0

In [3]:
# Splitting the data into testing, training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# One-hot encoding
y_train = to_one_hot(y_train)
y_test = to_one_hot(y_test)


In [4]:
import matplotlib as mpl
import matplotlib.pyplot as plt

# Widths in inches from revtex4's layout
# Single column ~3.375in, double column ~7in
columnwidth = 3.375  # use 7.0 for two-column-wide figures

# Compute figure size (width, height)
fig_width = columnwidth
fig_height = columnwidth / 1.618  # golden ratio for aesthetics
fig_size = [fig_width, fig_height]

mpl.rcParams.update({
    # Use LaTeX for text rendering
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": [],  # empty means use LaTeX default (Computer Modern)
    "font.size": 10.0,  # matches REVTeX's \normalsize

    # Adjust tick and label sizes
    "axes.labelsize": 10.0,
    "legend.fontsize": 8.0,
    "xtick.labelsize": 8.0,
    "ytick.labelsize": 8.0,

    # Figure dimensions
    "figure.figsize": fig_size,

    # Save with good resolution
    "savefig.dpi": 300,
})

## Without reg

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Convert one-hot encoded labels back to integer labels for Keras
y_train_keras = np.argmax(y_train, axis=1)
y_test_keras = np.argmax(y_test, axis=1)

# Define parameters matching implementation
epochs = 100
eta = 0.01

# Store results
keras_all_runs_results = []

# Create model matching architecture: 784 -> 500 -> 10
# NO regularization
model = keras.Sequential([
    layers.Dense(500, activation='sigmoid', 
                 input_shape=(X_train.shape[1],)),
    layers.Dense(10, activation='softmax')
])

# Use RMSprop optimizer to match train_network_SRMSprop
optimizer = keras.optimizers.RMSprop(learning_rate=eta)

# Compile model with categorical crossentropy
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train for one epoch at a time to track progress like implementation
for epoch in range(1, epochs + 1):
    # Train for 1 epoch
    history = model.fit(
        X_train, y_train_keras,
        epochs=1,
        verbose=0,
        validation_data=(X_test, y_test_keras)
    )
    
    # Get accuracy
    train_acc = history.history['accuracy'][0]
    test_acc = history.history['val_accuracy'][0]
    
    # Calculate overfitting gap
    overfit_gap = train_acc - test_acc
    
    # Store results
    keras_all_runs_results.append({
        'epoch': epoch,
        'train_acc': train_acc,
        'test_acc': test_acc,
        'overfit_gap': overfit_gap
    })
    
    # Print progress every 25 epochs
    if epoch % 25 == 0 or epoch == 1:
        print(f"Epoch {epoch}: Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}")

# Convert to DataFrame
df_keras_all_runs = pd.DataFrame(keras_all_runs_results)

print("\nKeras Results (No Regularization):")
print(df_keras_all_runs.head(10))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1: Train Acc = 0.9361, Test Acc = 0.9607
Epoch 25: Train Acc = 1.0000, Test Acc = 0.9814
Epoch 50: Train Acc = 1.0000, Test Acc = 0.9815
Epoch 75: Train Acc = 1.0000, Test Acc = 0.9814
Epoch 100: Train Acc = 1.0000, Test Acc = 0.9814

Keras Results (No Regularization):
   epoch  train_acc  test_acc  overfit_gap
0      1   0.936125  0.960714    -0.024589
1      2   0.972018  0.969500     0.002518
2      3   0.980018  0.972643     0.007375
3      4   0.983786  0.971500     0.012286
4      5   0.986804  0.974071     0.012732
5      6   0.989321  0.974071     0.015250
6      7   0.991554  0.975071     0.016482
7      8   0.993714  0.976000     0.017714
8      9   0.995464  0.977571     0.017893
9     10   0.996321  0.978357     0.017964


In [7]:
df_keras_all_runs.to_csv("MNIST-full_Keras_results.csv", index=False)

## With reg

In [None]:
# Define parameters matching implementation
epochs = 100
eta = 0.01
l2_lmbda = 0.001

# Store results across all runs
keras_all_runs_results = []


# Create model matching architecture: 784 -> 500 -> 10
model = keras.Sequential([
    layers.Dense(500, activation='sigmoid', 
                    input_shape=(X_train.shape[1],),
                    kernel_regularizer=regularizers.l2(l2_lmbda)),
    layers.Dense(10, activation='softmax',
                    kernel_regularizer=regularizers.l2(l2_lmbda))
])

# Use RMSprop optimizer to match train_network_SRMSprop
optimizer = keras.optimizers.RMSprop(learning_rate=eta)

# Compile model with categorical crossentropy
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train for one epoch at a time to track progress like implementation
for epoch in range(1, epochs + 1):
    # Train for 1 epoch
    history = model.fit(
        X_train, y_train_keras,
        epochs=1,
        verbose=0,
        validation_data=(X_test, y_test_keras)
    )
    
    # Get accuracy
    train_acc = history.history['accuracy'][0]
    test_acc = history.history['val_accuracy'][0]
    
    # Calculate overfitting gap
    overfit_gap = train_acc - test_acc
    
    # Store results
    keras_all_runs_results.append({
        'epoch': epoch,
        'train_acc': train_acc,
        'test_acc': test_acc,
        'overfit_gap': overfit_gap
    })
    
    # Print progress every 25 epochs
    if epoch % 25 == 0 or epoch == 1:
        print(f"Epoch {epoch}: Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}")

# Convert to DataFrame
df_keras_all_runs_reg = pd.DataFrame(keras_all_runs_results)

# Save as CSV
df_keras_all_runs_reg.to_csv("MNIST-full_Keras_reg_results.csv", index=False)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1: Train Acc = 0.8427, Test Acc = 0.8631
Epoch 25: Train Acc = 0.8965, Test Acc = 0.8946
Epoch 50: Train Acc = 0.8960, Test Acc = 0.9004
Epoch 75: Train Acc = 0.8987, Test Acc = 0.9069
Epoch 100: Train Acc = 0.8962, Test Acc = 0.9136
