# Import Libraries
Import necessary libraries including NumPy, Matplotlib, scikit-learn for data, pickle for model saving/loading, networkx for visualization, and tqdm for progress tracking.

In [None]:
# Import necessary libraries
import numpy as np  # For numerical computations
import matplotlib.pyplot as plt  # For plotting
from sklearn.preprocessing import StandardScaler  # For feature scaling
from sklearn.model_selection import train_test_split  # For splitting data into train/test sets


# Activation Functions Implementation
Implement activation function classes including the base Activation class, Linear, ReLU, Sigmoid, Tanh, and Softmax, each with forward and derivative methods.

# Loss Functions Implementation
Implement loss function classes including the base Loss class, MSE (Mean Squared Error), BinaryCrossEntropy, and CategoricalCrossEntropy, each with forward and derivative methods.

# Weight Initializers Implementation
Implement weight initializer classes including the base Initializer class, ZeroInitializer, UniformInitializer, and NormalInitializer.

# Layer Implementation
Implement the Layer class with forward and backward propagation methods, weight initialization, and gradient updates.

# FFNN Model Implementation
Implement the Feedforward Neural Network (FFNN) class with methods for creating the network, forward and backward propagation, weight updates, training, and prediction.

# Training and Evaluation on Dataset
Load the dataset, preprocess it for classification, and train the FFNN model with appropriate hyperparameters. Evaluate model performance using accuracy metrics.

In [None]:
# Import self-defined libraries
from activation import ReLU, Sigmoid
from initialization import NormalInitialization, UniformInitialization
from loss import BinaryCrossEntropy
from model import FFNN
from sklearn.datasets import fetch_openml

### Load Dataset & Data Preparation

In [None]:
# Fetch the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype(float)
y = mnist.target.astype(int)

# Convert to binary classification (0s vs 1s)
mask = (y == 0) | (y == 1)
X = X[mask]
y = y[mask].to_numpy().reshape(-1, 1)  # Reshape target to match the expected input shape

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Define the neural network architecture
layer_sizes=[784, 32, 16, 1]# Input layer, two hidden layers, and output layer
activations = [ReLU(), ReLU(), Sigmoid()]  # Activation functions for each layer
loss_function = BinaryCrossEntropy()  # Loss function for binary classification
initializers = [NormalInitialization(mean=0, variance=0.1, seed=42), NormalInitialization(mean=0, variance=0.1, seed=42), UniformInitialization(seed=42)]  

# Initialize the feedforward neural network
model = FFNN(layer_sizes=layer_sizes, activations=activations, loss=loss_function, initializations=initializers)

# Train the model
history = model.train(
    x_train=X_train,
    y_train=y_train,
    batch_size=16,
    learning_rate=0.01,
    epochs=100,
    x_y_val=(X_test, y_test),
    verbose=1
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions
accuracy = np.mean(y_pred_binary == y_test)  # Calculate accuracy
print(f"Test accuracy: {accuracy:.4f}")

# Save the trained model
model.save("ffnn_model.pkl")

# Load the model and verify its performance
loaded_model = FFNN.load("ffnn_model.pkl")
y_pred_loaded = loaded_model.predict(X_test)
y_pred_loaded_binary = (y_pred_loaded > 0.5).astype(int)
accuracy_loaded = np.mean(y_pred_loaded_binary == y_test)
print(f"Test accuracy (loaded model): {accuracy_loaded:.4f}")

# Model Visualization
Implement visualization of model architecture, training history, weight distributions, and gradient distributions using matplotlib and networkx.

In [None]:
# Visualize the model architecture
model.plot_model()

# Plot the training history
plt.figure(figsize=(10, 6))
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Plot the weight distributions for all layers
model.plot_weight_distribution()

# Plot the gradient distributions for all layers
model.plot_gradient_distribution()

# Save and Load Model
Demonstrate saving the trained model to disk and loading it back, then verify the loaded model produces identical predictions.

In [None]:
# Save the trained model to a file
model.save("ffnn_model.pkl")

# Load the model from the file
loaded_model = FFNN.load("ffnn_model.pkl")

# Verify that the loaded model produces identical predictions
y_pred_original = model.predict(X_test)
y_pred_loaded = loaded_model.predict(X_test)

# Check if predictions are identical
identical_predictions = np.allclose(y_pred_original, y_pred_loaded)
print(f"Are predictions identical? {'Yes' if identical_predictions else 'No'}")

# Calculate accuracy for the loaded model
y_pred_loaded_binary = (y_pred_loaded > 0.5).astype(int)
accuracy_loaded = np.mean(y_pred_loaded_binary == y_test)
print(f"Test accuracy (loaded model): {accuracy_loaded:.4f}")