In [None]:
# Imports and Setup
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

from train_mlp_numpy import train
from mlp_numpy import MLP
from modules import *

In [None]:
# Data Generation and Preprocessing
X, y_scalar = make_moons(n_samples=1000, noise=0.02, random_state=42)

# Visualize the raw data
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_scalar, s=40, cmap=plt.cm.RdYlBu)
plt.title('Visualization of the Raw Moons Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()


y_one_hot = np.zeros((len(y_scalar), 2))
y_one_hot[np.arange(len(y_scalar)), y_scalar] = 1

X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Bundle data for the train function
data_tuple = (X_train, y_train, X_test, y_test)
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


def plot_single_experiment_results(steps, train_accs, test_accs, loss_log, batch_size_title):
    """
    Creates two plots for a single experiment:
    1. Training & Testing Accuracy vs. Epoch
    2. Loss vs. Epoch
    """
    # Create a figure with two subplots, one above the other
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
    
    # --- Plot 1: Accuracy ---
    title_prefix = f"Batch Size = {batch_size_title}" if isinstance(batch_size_title, int) else "Batch Size = Full(BGD)"
    
    ax1.plot(steps, train_accs, label='Train Accuracy', marker='o', linestyle='-', markersize=4, markevery=1)
    ax1.plot(steps, test_accs, label='Test Accuracy', marker='x', linestyle='--', markersize=6, markevery=1)
    ax1.set_ylabel('Accuracy')
    ax1.set_ylim(0.47, 1.01)
    ax1.set_title(f'{title_prefix}: Accuracy with Epoch')
    ax1.grid(True)
    ax1.legend()
    
    # --- Plot 2: Loss ---
    ax2.plot(steps, loss_log, label='Loss', marker='s', linestyle='-.', color='green', markersize=4, markevery=1)
    ax2.set_ylabel('Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_title(f'{title_prefix}: Loss with Epoch')
    ax2.grid(True)
    ax2.legend()
    
    # Adjust layout to prevent titles from overlapping
    plt.tight_layout(pad=3.0)
    plt.show()


def plot_decision_boundary(model, X, y_scalar, batch_size_title):
    plt.figure(figsize=(8, 6))
    
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    grid_points = np.c_[xx.ravel(), yy.ravel()]
    
    logits = model.forward(grid_points)
    softmax_layer = SoftMax()
    probs = softmax_layer.forward(logits)
    Z = np.argmax(probs, axis=1)
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    plt.scatter(X[:, 0], X[:, 1], c=y_scalar, s=40, edgecolors='k', cmap=plt.cm.RdYlBu)
    
    title = f'Decision Boundary (Batch Size = {batch_size_title})'
    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.grid(True)
    plt.show()

def plot_decision_boundary(model, X, y_scalar, batch_size_title):
    """Plots the decision boundary for a trained model."""
    plt.figure(figsize=(8, 6))
    
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    grid_points = np.c_[xx.ravel(), yy.ravel()]
    
    # Perform forward pass to get predictions
    logits = model.forward(grid_points)
    softmax_layer = SoftMax() # The softmax layer is external to the MLP
    probs = softmax_layer.forward(logits)
    Z = np.argmax(probs, axis=1)
    Z = Z.reshape(xx.shape)
    
    # Plot the colored decision regions
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    
    # Overlay the original data points
    plt.scatter(X[:, 0], X[:, 1], c=y_scalar, s=40, edgecolors='k', cmap=plt.cm.RdYlBu)
    
    title = f'Decision Boundary (Batch Size = {batch_size_title})'
    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.grid(True)
    plt.show()

In [None]:
# --- Define Hyperparameters for the Experiments ---
DNN_HIDDEN_UNITS = '20,20'
MAX_EPOCHS = 100000
EVAL_FREQ = 1000

# --- List of Batch Sizes to Test ---
# We'll test SGD (1), a few mini-batch sizes, and BGD ('full')
EXPERIMENT_CONFIGS = {
    'full': 1
}

In [None]:
# Block 6: The Experiment Loop

# Dictionaries to store the results
results_history = {}
trained_models = {}

print("Starting training experiments with custom learning rates...")

# Iterate through the dictionary to get both batch_size and its paired learning_rate
for bs, lr in EXPERIMENT_CONFIGS.items():
    print(f"\n{'='*20} TRAINING WITH BATCH SIZE: {bs}, LEARNING RATE: {lr} {'='*20}")
    
    # Call the train function with the specific LR for this run
    # (This still assumes your train function returns the final model object)
    steps, train_accs, test_accs, loss_log, final_model = train(
        data=data_tuple,
        dnn_hidden_units=DNN_HIDDEN_UNITS,
        learning_rate=lr,  
        max_steps=MAX_EPOCHS,
        eval_freq=EVAL_FREQ,
        batch_size=bs,
        momentum = 0.6,
        leaky = True
    )
    print(f"\n--- Results for Batch Size: {bs} ---")
    plot_single_experiment_results(steps, train_accs, test_accs, loss_log, batch_size_title=bs)
    
    # Store results for plotting accuracy curves
    results_history[bs] = {
        'steps': steps,
        'train_accs': train_accs,
        'test_accs': test_accs,
        'loss': loss_log
    }
    
    # Store the trained model for visualizing the decision boundary
    trained_models[bs] = final_model

print("\nAll training experiments completed!")

In [None]:
# Block 7: Visualize All Decision Boundaries
print("\nVisualizing the decision boundary for each model...")
for bs, model in trained_models.items():
    plot_decision_boundary(model, X, y_scalar, batch_size_title=bs)