In [None]:
"""
# Comprehensive Neural Network Hyperparameter & Architecture Experiments

**Objectives:** Evaluate the impact of various modeling choices on three datasets:
- **Covertype** (classification)
- **Diabetes** (regression)
- **MNIST** (classification)

**Axes of Exploration:**
1. **Architecture**: Deep vs. Wide vs. Balanced
2. **Batch Size**: 32, 64, 128, 256
3. **Epochs**: 10, 20, 30
4. **Learning Rate**: 0.01, 0.001, 0.0001
5. **Dropout**: 0.0, 0.2, 0.5
6. **Optimizer**: SGD, RMSprop, Adam
7. **LR Schedule**: constant, exponential decay (0.9)
8. **Activation**: ReLU, LeakyReLU, ELU
9. **Normalization**: with/without BatchNorm
10. **Regularization**: L2 (0, 0.001)
11. **Data Augmentation**: MNIST shifts/rotations
12. **Depth vs. Width Ablation**: constant param count
13. **Training/Inference Time**

Results captured: test metrics, runtime, and parameter counts.
"""

# %%
import time
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_covtype, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks, optimizers, regularizers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# %%
# Data loaders

def load_covertype(sample_frac=0.2, random_state=42):
    X, y = fetch_covtype(return_X_y=True)
    rng = np.random.RandomState(random_state)
    idx = rng.choice(len(y), int(len(y)*sample_frac), replace=False)
    X, y = X[idx], y[idx] - 1
    X = StandardScaler().fit_transform(X)
    y_ohe = OneHotEncoder(sparse_output=False).fit_transform(y.reshape(-1,1))
    return train_test_split(X, y_ohe, test_size=0.2, random_state=random_state)


def load_diabetes_data(test_size=0.2, random_state=42):
    X, y = load_diabetes(return_X_y=True)
    X = StandardScaler().fit_transform(X)
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


def load_mnist_data(test_size=0.2, random_state=42):
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    X = np.concatenate([x_train, x_test]).reshape(-1,28*28) / 255.0
    y = np.concatenate([y_train, y_test])
    y_ohe = OneHotEncoder(sparse_output=False).fit_transform(y.reshape(-1,1))
    return train_test_split(X, y_ohe, test_size=test_size, random_state=random_state)

# %%
# Model builder

def build_model(input_dim, output_dim, architecture, lr, dropout, activation='relu', use_bn=True, l2_strength=0.0):
    reg = regularizers.l2(l2_strength)
    model = keras.Sequential()
    model.add(layers.Input(shape=(input_dim,)))
    if architecture == 'deep':
        units = [128,64,32]
    elif architecture == 'balanced':
        units = [128,64]
    else:
        units = [256]
    for u in units:
        model.add(layers.Dense(u, activation=None, kernel_regularizer=reg))
        if use_bn:
            model.add(layers.BatchNormalization())
        if activation == 'leaky':
            model.add(layers.LeakyReLU())
        elif activation == 'elu':
            model.add(layers.ELU())
        else:
            model.add(layers.Activation('relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    if output_dim > 1:
        model.add(layers.Dense(output_dim, activation='softmax'))
        loss = 'categorical_crossentropy'
    else:
        model.add(layers.Dense(1, activation='linear'))
        loss = 'mse'
    model.compile(
        optimizer=optimizers.Adam(learning_rate=lr),
        loss=loss,
        metrics=['accuracy'] if output_dim>1 else []
    )
    return model

# %%
# Experiment loop

datasets = [
    ('Covertype', load_covertype, True),
    ('Diabetes', load_diabetes_data, False),
    ('MNIST', load_mnist_data, True)
]
architectures = ['deep','wide','balanced']
batch_sizes = [32, 64, 128]
epochs_list = [10, 20]
learning_rates = [0.01, 0.001]
dropout_rates = [0.0, 0.2, 0.5]
activations = ['relu','leaky','elu']
use_bn_opts = [True, False]
l2_strengths = [0.0, 0.001]
optimizers_map = {'Adam':optimizers.Adam, 'SGD':optimizers.SGD, 'RMSprop':optimizers.RMSprop}
results = []

for name, loader, is_class in datasets:
    X_train, X_test, y_train, y_test = loader()
    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1] if is_class else 1
    for arch in architectures:
        for bs in batch_sizes:
            for ep in epochs_list:
                for lr in learning_rates:
                    for do in dropout_rates:
                        for act in activations:
                            for use_bn in use_bn_opts:
                                for l2 in l2_strengths:
                                    for opt_name, opt_cls in optimizers_map.items():
                                        model = build_model(input_dim, output_dim, arch, lr, do, activation=act, use_bn=use_bn, l2_strength=l2)
                                        start = time.time()
                                        hist = model.fit(
                                            X_train, y_train,
                                            epochs=ep, batch_size=bs,
                                            validation_split=0.1,
                                            callbacks=[callbacks.EarlyStopping(patience=3, restore_best_weights=True)],
                                            verbose=0
                                        )
                                        duration = time.time() - start
                                        if is_class:
                                            preds = model.predict(X_test)
                                            y_pred = preds.argmax(axis=1)
                                            y_true = y_test.argmax(axis=1)
                                            acc = accuracy_score(y_true, y_pred)
                                            prec = precision_score(y_true, y_pred, average='macro', zero_division=0)
                                            rec = recall_score(y_true, y_pred, average='macro', zero_division=0)
                                            f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
                                            results.append({
                                                'Dataset':name,'Arch':arch,'Batch':bs,'Epochs':ep,'LR':lr,
                                                'Dropout':do,'Act':act,'BatchNorm':use_bn,'L2':l2,'Opt':opt_name,
                                                'Accuracy':acc,'Precision':prec,'Recall':rec,'F1':f1,'Time_s':duration
                                            })
                                        else:
                                            pred = model.predict(X_test).flatten()
                                            mse = mean_squared_error(y_test, pred)
                                            mae = mean_absolute_error(y_test, pred)
                                            rmse = np.sqrt(mse)
                                            results.append({
                                                'Dataset':name,'Arch':arch,'Batch':bs,'Epochs':ep,'LR':lr,
                                                'Dropout':do,'Act':act,'BatchNorm':use_bn,'L2':l2,'Opt':opt_name,
                                                'MSE':mse,'MAE':mae,'RMSE':rmse,'Time_s':duration
                                            })

# %%
# Compile results
df = pd.DataFrame(results)
print(df.head())

