
# Neural Network Training — Keras (TensorFlow) Demo

This notebook mirrors the NumPy/PyTorch demos using **Keras** for a compact, high-level API.

**Covers:**
- Synthetic 2D dataset with train/val split and standardization
- MLP: 2 → H → 1 with ReLU + sigmoid, `binary_crossentropy`
- Training with `model.fit`, validation metrics, and plots
- Experiments: learning rate, L2 regularization (`kernel_regularizer`), dropout
- Seed sensitivity (mean±std val accuracy)
- Label-noise robustness
- Gradient sanity check with `tf.GradientTape`


In [None]:

import os, random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

print("TF version:", tf.__version__)
device = "GPU" if tf.config.list_physical_devices('GPU') else "CPU"
print("Device:", device)

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(42)


## 1) Data: 2D blobs + standardization

In [None]:

def make_blobs(n_per_class=400, spread=1.1, offset=2.0, seed=42):
    rng = np.random.default_rng(seed)
    mean0 = np.array([-offset, 0.0])
    mean1 = np.array([ offset, 0.0])
    cov = spread * np.eye(2)
    X0 = rng.multivariate_normal(mean0, cov, size=n_per_class)
    X1 = rng.multivariate_normal(mean1, cov, size=n_per_class)
    X = np.vstack([X0, X1]).astype(np.float32)
    y = np.hstack([np.zeros(n_per_class), np.ones(n_per_class)]).astype(np.float32).reshape(-1,1)
    idx = rng.permutation(len(X))
    return X[idx], y[idx]

def standardize(X):
    m = X.mean(axis=0, keepdims=True)
    s = X.std(axis=0, keepdims=True) + 1e-8
    return (X - m)/s, m, s

X, y = make_blobs(n_per_class=400, spread=1.1, offset=2.0, seed=42)
X, X_mean, X_std = standardize(X)

n = len(X)
split = int(0.8*n)
X_train, y_train = X[:split], y[:split]
X_val, y_val = X[split:], y[split:]

print("Train:", X_train.shape, y_train.shape, "| Val:", X_val.shape, y_val.shape)

plt.figure(figsize=(5,5))
plt.scatter(X_train[:,0], X_train[:,1], c=y_train.flatten(), s=10, alpha=0.6)
plt.title("Training Data (standardized)")
plt.xlabel("x1"); plt.ylabel("x2")
plt.show()


## 2) Keras Model: 2 → H → 1

In [None]:

def build_model(hidden_dim=16, lr=0.05, l2=0.0, dropout_p=0.0, seed=42):
    set_seed(seed)
    model = keras.Sequential([
        layers.Input(shape=(2,)),
        layers.Dense(hidden_dim, activation="relu",
                     kernel_regularizer=regularizers.l2(l2) if l2>0 else None),
        layers.Dropout(dropout_p) if dropout_p>0 else layers.Lambda(lambda x: x),
        layers.Dense(1, activation="sigmoid")
    ])
    opt = keras.optimizers.SGD(learning_rate=lr)
    model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["accuracy"])
    return model

def train_model(hidden_dim=16, lr=0.05, l2=0.0, dropout_p=0.0,
                epochs=200, batch_size=64, seed=42, verbose=0):
    model = build_model(hidden_dim, lr, l2, dropout_p, seed)
    hist = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=epochs, batch_size=batch_size, verbose=verbose)
    return model, hist


## 3) Baseline Training

In [None]:

model, hist = train_model(hidden_dim=16, lr=0.05, l2=0.0, dropout_p=0.0,
                          epochs=200, batch_size=64, seed=42, verbose=0)
val_acc = model.evaluate(X_val, y_val, verbose=0)[1]
print("Validation accuracy (baseline):", round(val_acc, 4))

plt.figure(figsize=(5,4))
plt.plot(hist.history["loss"], label="train")
plt.plot(hist.history["val_loss"], label="val")
plt.title("Loss Curves (Baseline)")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.show()


## 4) Experiments

### A) Learning Rate

In [None]:

_, h1 = train_model(lr=0.5, epochs=100, verbose=0, seed=100)
_, h2 = train_model(lr=0.05, epochs=100, verbose=0, seed=100)
plt.figure(figsize=(5,4))
plt.plot(h1.history["val_loss"], label="val (lr=0.5)")
plt.plot(h2.history["val_loss"], label="val (lr=0.05)")
plt.title("Learning Rate Comparison")
plt.xlabel("Epoch"); plt.ylabel("Val Loss"); plt.legend(); plt.show()


### B) L2 Regularization

In [None]:

_, h3 = train_model(lr=0.05, l2=1e-3, epochs=200, verbose=0, seed=123)
_, h4 = train_model(lr=0.05, l2=1e-2, epochs=200, verbose=0, seed=123)
plt.figure(figsize=(5,4))
plt.plot(h3.history["val_loss"], label="val (l2=1e-3)")
plt.plot(h4.history["val_loss"], label="val (l2=1e-2)")
plt.title("L2 Regularization Comparison")
plt.xlabel("Epoch"); plt.ylabel("Val Loss"); plt.legend(); plt.show()


### C) Dropout

In [None]:

_, h5 = train_model(lr=0.05, dropout_p=0.2, epochs=200, verbose=0, seed=222)
_, h6 = train_model(lr=0.05, dropout_p=0.5, epochs=200, verbose=0, seed=222)
plt.figure(figsize=(5,4))
plt.plot(h5.history["val_loss"], label="val (dropout=0.2)")
plt.plot(h6.history["val_loss"], label="val (dropout=0.5)")
plt.title("Dropout Comparison")
plt.xlabel("Epoch"); plt.ylabel("Val Loss"); plt.legend(); plt.show()


### D) Seed Sensitivity

In [None]:

def seed_sweep(n_runs=10, base_seed=0, **kwargs):
    accs = []
    for i in range(n_runs):
        seed = base_seed + i*17
        m, h = train_model(seed=seed, verbose=0, **kwargs)
        acc = m.evaluate(X_val, y_val, verbose=0)[1]
        accs.append(acc)
    return np.array(accs)

accs = seed_sweep(n_runs=10, base_seed=0, hidden_dim=16, lr=0.05, l2=0.0, dropout_p=0.0,
                  epochs=200, batch_size=64)
print("Seed sweep (n=10) — val acc mean±std:", round(accs.mean(),4), "±", round(accs.std(),4))
plt.figure(figsize=(5,3))
plt.plot(accs, marker='o'); plt.ylim(0,1); plt.title("Val Accuracy across Seeds")
plt.xlabel("Run"); plt.ylabel("Val Acc"); plt.show()


### E) Label-Noise Robustness

In [None]:

def flip_labels(y, noise_rate=0.1, seed=42):
    rng = np.random.default_rng(seed)
    y2 = y.copy()
    m = len(y2)
    idx = rng.choice(m, size=int(noise_rate*m), replace=False)
    y2[idx] = 1 - y2[idx]
    return y2

for noise in [0.0, 0.1, 0.2]:
    y_tr_noisy = flip_labels(y_train, noise_rate=noise, seed=999)
    print(f"\nNoise rate = {noise:.1f}")
    # No L2
    m1 = build_model(lr=0.05, l2=0.0, seed=321)
    m1.fit(X_train, y_tr_noisy, validation_data=(X_val, y_val), epochs=200, batch_size=64, verbose=0)
    acc1 = m1.evaluate(X_val, y_val, verbose=0)[1]
    # With L2
    m2 = build_model(lr=0.05, l2=1e-3, seed=321)
    m2.fit(X_train, y_tr_noisy, validation_data=(X_val, y_val), epochs=200, batch_size=64, verbose=0)
    acc2 = m2.evaluate(X_val, y_val, verbose=0)[1]
    print(f"Val acc (no L2): {acc1:.3f} | Val acc (L2=1e-3): {acc2:.3f}")


## 5) Gradient Sanity Check with `tf.GradientTape`

In [None]:

# Small batch
Xb = X_train[:5]
yb = y_train[:5]

model_gc = build_model(hidden_dim=5, lr=0.05, l2=1e-3, dropout_p=0.0, seed=7)

with tf.GradientTape() as tape:
    preds = model_gc(Xb, training=True)
    loss = keras.losses.binary_crossentropy(yb, preds)
    loss = tf.reduce_mean(loss) + sum(model_gc.losses)  # include L2 if any

grads = tape.gradient(loss, model_gc.trainable_variables)
for var, g in zip(model_gc.trainable_variables, grads):
    print(var.name, "grad shape:", None if g is None else g.shape)
print("Gradient sanity check complete.")
