# Solution: The Diverging Neural Network

This notebook provides the complete solution to the debug drill.

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

In [None]:
# Generate data with features on different scales
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, random_state=42)

# Make features on vastly different scales
X[:, 0] *= 1000
X[:, 1] *= 100
X[:, 2] *= 0.01

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Feature scales:")
print(f"  Feature 0: {X[:, 0].min():.0f} to {X[:, 0].max():.0f}")
print(f"  Feature 1: {X[:, 1].min():.0f} to {X[:, 1].max():.0f}")
print(f"  Feature 2: {X[:, 2].min():.4f} to {X[:, 2].max():.4f}")

In [None]:
# ===== BROKEN CODE =====
model_broken = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    learning_rate_init=1.0,  # BUG: Too high!
    max_iter=100,
    random_state=42
)

model_broken.fit(X_train, y_train)  # BUG: Not scaled!

print("=== Broken Model ===")
print(f"Final loss: {model_broken.loss_:.4f}")
print(f"Test accuracy: {model_broken.score(X_test, y_test):.1%}")

In [None]:
# ===== FIXED CODE =====

# Fix 1: Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fix 2: Use a reasonable learning rate
model_fixed = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    learning_rate_init=0.001,  # Fixed: reasonable LR
    max_iter=200,
    random_state=42,
    early_stopping=True,
    validation_fraction=0.1
)

model_fixed.fit(X_train_scaled, y_train)

print("=== Fixed Model ===")
print(f"Final loss: {model_fixed.loss_:.4f}")
print(f"Train accuracy: {model_fixed.score(X_train_scaled, y_train):.1%}")
print(f"Test accuracy: {model_fixed.score(X_test_scaled, y_test):.1%}")

In [None]:
# Compare loss curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

if hasattr(model_broken, 'loss_curve_') and len(model_broken.loss_curve_) > 0:
    axes[0].plot(model_broken.loss_curve_, 'r-', linewidth=2)
axes[0].set_xlabel('Iteration')
axes[0].set_ylabel('Loss')
axes[0].set_title('BROKEN: Unscaled + High LR')
axes[0].grid(True, alpha=0.3)

axes[1].plot(model_fixed.loss_curve_, 'g-', linewidth=2)
axes[1].set_xlabel('Iteration')
axes[1].set_ylabel('Loss')
axes[1].set_title('FIXED: Scaled + Reasonable LR')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Solution Summary

**Bug 1: No Feature Scaling**
- Problem: Features on different scales (0.01 to 1000) cause unstable gradients
- Fix: Apply `StandardScaler` before training

**Bug 2: Learning Rate Too High**
- Problem: LR=1.0 causes overshooting and divergence
- Fix: Use LR=0.001 (typical default)

**Result:**
- Loss decreased smoothly
- Test accuracy improved from ~50% to ~85%