# Implicit Bias of Optimization in Deep Networks

Interactive analysis notebook for exploring implicit bias of different optimizers.

In [None]:
import sys
sys.path.insert(0, '..')

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.models import LinearModel, ShallowMLP, DeepMLP
from src.optimizers import get_optimizer, FullBatchGD, Lion
from src.metrics import (
    MetricsTracker, compute_weight_norms, compute_margin,
    compute_accuracy, compute_margin_ratios
)
from src.data import (
    generate_linearly_separable, generate_xor_data,
    generate_spiral_data, get_dataset
)
from src.visualization import (
    plot_training_curves, plot_decision_boundary,
    plot_multi_decision_boundaries, plot_margin_comparison
)

plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

print('Setup complete!')

## 1. Quick Demo: Linear Model on Separable Data

In [None]:
# Generate data
X, y = generate_linearly_separable(n_samples=100, dim=2, margin=1.0, seed=42)

# Visualize
plt.figure(figsize=(8, 6))
plt.scatter(X[y > 0, 0], X[y > 0, 1], c='blue', label='+1', edgecolors='white')
plt.scatter(X[y < 0, 0], X[y < 0, 1], c='red', label='-1', edgecolors='white')
plt.xlabel('x₁')
plt.ylabel('x₂')
plt.title('Linearly Separable Data')
plt.legend()
plt.show()

In [None]:
def train_and_compare(X, y, epochs=2000, lr=0.01):
    """Train with multiple optimizers and compare."""
    optimizers = ['gd', 'sgd', 'adam', 'lion']
    results = {}
    models = {}
    
    for opt_name in optimizers:
        # Fresh model
        model = LinearModel(input_dim=X.shape[1], output_dim=1, bias=False)
        torch.manual_seed(42)
        nn.init.normal_(model.linear.weight, std=0.01)
        
        optimizer = get_optimizer(opt_name, model.parameters(), lr=lr)
        loss_fn = nn.BCEWithLogitsLoss()
        y_bce = (y + 1) / 2
        
        history = {'loss': [], 'margin': [], 'norm_l2': [], 'norm_linf': []}
        
        for epoch in range(epochs):
            optimizer.zero_grad()
            outputs = model(X).squeeze()
            loss = loss_fn(outputs, y_bce)
            loss.backward()
            optimizer.step()
            
            if epoch % 100 == 0:
                l2, linf = compute_weight_norms(model)
                margin = compute_margin(model, X, y, normalize=True)
                history['loss'].append(loss.item())
                history['margin'].append(margin)
                history['norm_l2'].append(l2)
                history['norm_linf'].append(linf)
        
        results[opt_name] = history
        models[opt_name] = model
        
        l2_ratio, linf_ratio = compute_margin_ratios(model, X, y)
        print(f"{opt_name.upper():>5}: ℓ2-ratio={l2_ratio:.4f}, ℓ∞-ratio={linf_ratio:.4f}")
    
    return results, models

results, models = train_and_compare(X, y)

In [None]:
# Plot training dynamics
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

for opt_name, history in results.items():
    epochs = range(0, len(history['loss']) * 100, 100)
    axes[0, 0].plot(epochs, history['loss'], label=opt_name.upper())
    axes[0, 1].plot(epochs, history['margin'], label=opt_name.upper())
    axes[1, 0].plot(epochs, history['norm_l2'], label=opt_name.upper())
    axes[1, 1].plot(epochs, history['norm_linf'], label=opt_name.upper())

axes[0, 0].set_title('Training Loss'); axes[0, 0].set_xlabel('Epoch'); axes[0, 0].legend()
axes[0, 1].set_title('Normalized Margin'); axes[0, 1].set_xlabel('Epoch'); axes[0, 1].legend()
axes[1, 0].set_title('Weight Norm (ℓ₂)'); axes[1, 0].set_xlabel('Epoch'); axes[1, 0].legend()
axes[1, 1].set_title('Weight Norm (ℓ∞)'); axes[1, 1].set_xlabel('Epoch'); axes[1, 1].legend()

plt.tight_layout()
plt.show()

In [None]:
# Decision boundaries comparison
plot_multi_decision_boundaries(models, X, y)

## 2. XOR Data (Requires Nonlinearity)

In [None]:
X_xor, y_xor = generate_xor_data(n_samples=200, noise=0.15, seed=42)

plt.figure(figsize=(8, 6))
plt.scatter(X_xor[y_xor > 0, 0], X_xor[y_xor > 0, 1], c='blue', label='+1')
plt.scatter(X_xor[y_xor < 0, 0], X_xor[y_xor < 0, 1], c='red', label='-1')
plt.title('XOR Data')
plt.legend()
plt.show()

In [None]:
# Train shallow MLP on XOR
def train_mlp(X, y, hidden_dim=32, epochs=1000, lr=0.01):
    models = {}
    
    for opt_name in ['gd', 'adam', 'lion']:
        model = ShallowMLP(input_dim=2, hidden_dim=hidden_dim, activation='relu')
        torch.manual_seed(42)
        
        optimizer = get_optimizer(opt_name, model.parameters(), lr=lr)
        loss_fn = nn.BCEWithLogitsLoss()
        y_bce = (y + 1) / 2
        
        for epoch in range(epochs):
            optimizer.zero_grad()
            loss = loss_fn(model(X).squeeze(), y_bce)
            loss.backward()
            optimizer.step()
        
        acc = compute_accuracy(model, X, y)
        print(f"{opt_name.upper()}: Accuracy = {acc:.2%}")
        models[opt_name] = model
    
    return models

xor_models = train_mlp(X_xor, y_xor)

In [None]:
plot_multi_decision_boundaries(xor_models, X_xor, y_xor)

## 3. Load and Analyze Experiment Results

After running the experiment scripts, load and analyze results here.

In [None]:
import json
from pathlib import Path

# Load baseline results if available
baseline_path = Path('../results/baseline/all_metrics.json')
if baseline_path.exists():
    with open(baseline_path) as f:
        baseline_results = json.load(f)
    print('Loaded baseline results!')
    plot_training_curves(baseline_results)
else:
    print('Run experiments/01_baseline_linear.py first!')

## 4. Custom Experiments

Use this section to run custom experiments.

In [None]:
# Your custom experiments here
pass