In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import pickle
from pathlib import Path

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [None]:
print("=" * 60)
print("Part 1: Complete Binary Classification Workflow")
print("=" * 60)

# Step 1: Create data
print("\nStep 1: Creating data...")
X, y = make_circles(n_samples=1000, noise=0.03, factor=0.5, random_state=42)

# Step 2: Split data
print("Step 2: Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 3: Convert to tensors
print("Step 3: Converting to tensors...")
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).unsqueeze(1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).unsqueeze(1)

# Step 4: Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Step 4: Using device: {device}")

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Step 5: Define model
print("Step 5: Building model...")
class BinaryClassifier(nn.Module):
    def __init__(self, hidden_units=[16, 8]):
        super(BinaryClassifier, self).__init__()
        layers = []
        prev_size = 2
        for hidden_size in hidden_units:
            layers.extend([nn.Linear(prev_size, hidden_size), nn.ReLU()])
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, 1))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

model = BinaryClassifier(hidden_units=[16, 8]).to(device)

# Step 6: Define loss and optimizer
print("Step 6: Setting up loss and optimizer...")
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 7: Train model
print("Step 7: Training model...")
epochs = 100
train_losses = []
test_losses = []

for epoch in range(epochs):
    model.train()
    y_logits = model(X_train)
    loss = criterion(y_logits, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_losses.append(loss.item())

    model.eval()
    with torch.inference_mode():
        test_logits = model(X_test)
        test_loss = criterion(test_logits, y_test)
        test_losses.append(test_loss.item())

    if (epoch + 1) % 20 == 0:
        print(f'  Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')

# Step 8: Evaluate
print("Step 8: Evaluating model...")
model.eval()
with torch.inference_mode():
    test_probs = torch.sigmoid(model(X_test))
    test_preds = (test_probs > 0.5).long()

accuracy = (test_preds == y_test).float().mean()
print(f"Binary Classification Accuracy: {accuracy.item()*100:.2f}%")

In [None]:
print("\n" + "=" * 60)
print("Part 2: Complete Multi-Class Workflow")
print("=" * 60)

# Create multi-class data
print("\nCreating multi-class data...")
X_multi, y_multi = make_blobs(n_samples=1000, n_features=2, centers=4, random_state=42)

X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(
    X_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)

X_train_m = torch.FloatTensor(X_train_m)
y_train_m = torch.LongTensor(y_train_m)
X_test_m = torch.FloatTensor(X_test_m)
y_test_m = torch.LongTensor(y_test_m)

X_train_m, y_train_m = X_train_m.to(device), y_train_m.to(device)
X_test_m, y_test_m = X_test_m.to(device), y_test_m.to(device)

# Define multi-class model
class MultiClassClassifier(nn.Module):
    def __init__(self, num_classes=4, hidden_units=[16, 8]):
        super(MultiClassClassifier, self).__init__()
        layers = []
        prev_size = 2
        for hidden_size in hidden_units:
            layers.extend([nn.Linear(prev_size, hidden_size), nn.ReLU()])
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, num_classes))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

model_m = MultiClassClassifier(num_classes=4, hidden_units=[16, 8]).to(device)

# Train
print("Training multi-class model...")
criterion_m = nn.CrossEntropyLoss()
optimizer_m = optim.Adam(model_m.parameters(), lr=0.01)

for epoch in range(100):
    model_m.train()
    outputs = model_m(X_train_m)
    loss = criterion_m(outputs, y_train_m)
    optimizer_m.zero_grad()
    loss.backward()
    optimizer_m.step()

    if (epoch + 1) % 20 == 0:
        print(f'  Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluate
model_m.eval()
with torch.inference_mode():
    test_preds_m = torch.argmax(model_m(X_test_m), dim=1)

accuracy_m = (test_preds_m == y_test_m).float().mean()
print(f"Multi-class Accuracy: {accuracy_m.item()*100:.2f}%")

In [None]:
print("\n" + "=" * 60)
print("Part 3: Hyperparameter Experimentation")
print("=" * 60)

def train_and_evaluate(hidden_units, learning_rate, epochs=100):
    """Train model with given hyperparameters"""
    # Create model
    model = BinaryClassifier(hidden_units=hidden_units).to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train
    for epoch in range(epochs):
        model.train()
        y_logits = model(X_train)
        loss = criterion(y_logits, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluate
    model.eval()
    with torch.inference_mode():
        test_preds = (torch.sigmoid(model(X_test)) > 0.5).long()
        accuracy = (test_preds == y_test).float().mean()

    return accuracy.item()

# Experiment with different configurations
configs = [
    {'hidden_units': [8], 'lr': 0.01},
    {'hidden_units': [16], 'lr': 0.01},
    {'hidden_units': [16, 8], 'lr': 0.01},
    {'hidden_units': [32, 16], 'lr': 0.001},
    {'hidden_units': [64, 32, 16], 'lr': 0.001},
]

print("\nTesting different configurations:")
results = []
for i, config in enumerate(configs):
    acc = train_and_evaluate(config['hidden_units'], config['lr'])
    results.append({**config, 'accuracy': acc})
    print(f"  Config {i+1}: hidden={config['hidden_units']}, lr={config['lr']}, Accuracy: {acc*100:.2f}%")

# Find best configuration
best_config = max(results, key=lambda x: x['accuracy'])
print(f"\nBest configuration: {best_config}")

In [None]:
print("\n" + "=" * 60)
print("Part 4: Model Comparison")
print("=" * 60)

# Train multiple models
models = {
    'Linear': BinaryClassifier(hidden_units=[]),
    'Small': BinaryClassifier(hidden_units=[8]),
    'Medium': BinaryClassifier(hidden_units=[16, 8]),
    'Large': BinaryClassifier(hidden_units=[32, 16]),
}

print("\nComparing different model sizes:")
comparison_results = {}

for name, model_comp in models.items():
    model_comp = model_comp.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model_comp.parameters(), lr=0.01)

    # Train
    for epoch in range(100):
        model_comp.train()
        y_logits = model_comp(X_train)
        loss = criterion(y_logits, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluate
    model_comp.eval()
    with torch.inference_mode():
        test_preds = (torch.sigmoid(model_comp(X_test)) > 0.5).long()
        accuracy = (test_preds == y_test).float().mean()
        params = sum(p.numel() for p in model_comp.parameters())

    comparison_results[name] = {
        'accuracy': accuracy.item(),
        'parameters': params
    }
    print(f"  {name:8s}: Accuracy={accuracy.item()*100:5.2f}%, Parameters={params:,}")


In [None]:
print("\n" + "=" * 60)
print("Part 5: Saving and Loading Models")
print("=" * 60)

# Create directory for saving models
model_dir = Path('saved_models')
model_dir.mkdir(exist_ok=True)

# Save model state dict
model_path = model_dir / 'binary_classifier.pth'
torch.save(model.state_dict(), model_path)
print(f"Model saved to: {model_path}")

# Load model
loaded_model = BinaryClassifier(hidden_units=[16, 8])
loaded_model.load_state_dict(torch.load(model_path))
loaded_model = loaded_model.to(device)
loaded_model.eval()
print("Model loaded successfully!")

# Verify loaded model works
with torch.inference_mode():
    test_preds_loaded = (torch.sigmoid(loaded_model(X_test)) > 0.5).long()
    accuracy_loaded = (test_preds_loaded == y_test).float().mean()
print(f"Loaded model accuracy: {accuracy_loaded.item()*100:.2f}%")

# Save complete model (including architecture)
complete_model_path = model_dir / 'binary_classifier_complete.pth'
torch.save(model, complete_model_path)
print(f"Complete model saved to: {complete_model_path}")

# Load complete model
loaded_complete = torch.load(complete_model_path, weights_only=False)
loaded_complete.eval()
print("Complete model loaded successfully!")

In [None]:
print("\n" + "=" * 60)
print("Part 6: Model Inference Function")
print("=" * 60)

def predict(model, X_new, device='cpu'):
    """Make predictions on new data"""
    model.eval()
    model = model.to(device)

    # Convert to tensor if needed
    if not isinstance(X_new, torch.Tensor):
        X_new = torch.FloatTensor(X_new)

    X_new = X_new.to(device)

    # Make prediction
    with torch.inference_mode():
        logits = model(X_new)
        probs = torch.sigmoid(logits)
        preds = (probs > 0.5).long()

    return {
        'logits': logits.cpu(),
        'probabilities': probs.cpu(),
        'predictions': preds.cpu()
    }

# Test inference function
X_new = torch.randn(5, 2)
results = predict(model, X_new, device)

print("\nPredictions for 5 new samples:")
for i in range(5):
    print(f"  Sample {i+1}:")
    print(f"    Input: {X_new[i].numpy()}")
    print(f"    Probability: {results['probabilities'][i].item():.4f}")
    print(f"    Predicted class: {results['predictions'][i].item()}")


In [None]:
print("\n" + "=" * 60)
print("Part 7: Complete Training Class")
print("=" * 60)

class ClassificationTrainer:
    """Complete training pipeline for classification"""

    def __init__(self, model, device='cpu'):
        self.model = model.to(device)
        self.device = device
        self.history = {'train_loss': [], 'test_loss': []}

    def train(self, X_train, y_train, X_test, y_test,
              epochs=100, lr=0.01):
        """Train the model"""
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=lr)

        for epoch in range(epochs):
            # Training
            self.model.train()
            y_logits = self.model(X_train)
            loss = criterion(y_logits, y_train)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Testing
            self.model.eval()
            with torch.inference_mode():
                test_logits = self.model(X_test)
                test_loss = criterion(test_logits, y_test)

            self.history['train_loss'].append(loss.item())
            self.history['test_loss'].append(test_loss.item())

            if (epoch + 1) % 20 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')

    def evaluate(self, X_test, y_test):
        """Evaluate the model"""
        self.model.eval()
        with torch.inference_mode():
            test_probs = torch.sigmoid(self.model(X_test))
            test_preds = (test_probs > 0.5).long()
            accuracy = (test_preds == y_test).float().mean()

        return accuracy.item()

    def save(self, path):
        """Save the model"""
        torch.save(self.model.state_dict(), path)
        print(f"Model saved to: {path}")

    def load(self, path):
        """Load the model"""
        self.model.load_state_dict(torch.load(path))
        print(f"Model loaded from: {path}")

# Use the trainer
print("\nUsing ClassificationTrainer:")
model_trainer = BinaryClassifier(hidden_units=[16, 8])
trainer = ClassificationTrainer(model_trainer, device=device)
trainer.train(X_train, y_train, X_test, y_test, epochs=100, lr=0.01)
accuracy_trainer = trainer.evaluate(X_test, y_test)
print(f"Final accuracy: {accuracy_trainer*100:.2f}%")

# Save the trained model
trainer.save(model_dir / 'trained_classifier.pth')

In [None]:
print("\n" + "=" * 60)
print("Part 8: Training Curves Visualization")
print("=" * 60)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curves
axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(test_losses, label='Test Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Loss Curves')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Model comparison
names = list(comparison_results.keys())
accuracies = [comparison_results[name]['accuracy'] * 100 for name in names]
axes[1].bar(names, accuracies)
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Model Comparison')
axes[1].set_ylim(0, 100)
for i, v in enumerate(accuracies):
    axes[1].text(i, v + 2, f'{v:.1f}%', ha='center')

plt.tight_layout()
plt.show()

In [None]:
print("\n" + "=" * 60)
print("Exercise 1: Design your own experiment")
print("=" * 60)

# Define configurations to test
experiment_configs = [
    {
        'name': 'Config A - Simple Architecture, High LR, Few Epochs',
        'hidden_units': [8],
        'learning_rate': 0.05,
        'epochs': 50
    },
    {
        'name': 'Config B - Medium Architecture, Medium LR, Medium Epochs',
        'hidden_units': [16, 8],
        'learning_rate': 0.01,
        'epochs': 100
    },
    {
        'name': 'Config C - Complex Architecture, Low LR, Many Epochs',
        'hidden_units': [32, 16, 8],
        'learning_rate': 0.001,
        'epochs': 150
    },
    {
        'name': 'Config D - Different Architecture, Medium LR, Medium Epochs',
        'hidden_units': [64, 32],
        'learning_rate': 0.01,
        'epochs': 100
    },
]

# Test each configuration
experiment_results = []

for config in experiment_configs:
    print(f"\n{config['name']}")
    print(f"  Architecture: {config['hidden_units']}")
    print(f"  Learning Rate: {config['learning_rate']}")
    print(f"  Epochs: {config['epochs']}")

    # Create a new model for this configuration
    experiment_model = BinaryClassifier(hidden_units=config['hidden_units'])

    # Create trainer
    experiment_trainer = ClassificationTrainer(experiment_model, device=device)

    # Train the model
    experiment_trainer.train(
        X_train, y_train, X_test, y_test,
        epochs=config['epochs'],
        lr=config['learning_rate']
    )

    # Evaluate
    experiment_accuracy = experiment_trainer.evaluate(X_test, y_test)

    # Store results
    result = {
        'name': config['name'],
        'hidden_units': config['hidden_units'],
        'learning_rate': config['learning_rate'],
        'epochs': config['epochs'],
        'accuracy': experiment_accuracy,
        'parameters': sum(p.numel() for p in experiment_model.parameters())
    }
    experiment_results.append(result)

    print(f"  Final Accuracy: {experiment_accuracy*100:.2f}%")
    print(f"  Total Parameters: {result['parameters']:,}")


In [None]:
print("\n" + "=" * 60)
print("Exercise 2: Compare on different datasets")
print("=" * 60)

test_model_config = {'hidden_units': [16, 8], 'learning_rate': 0.01, 'epochs': 100}

exercise2_results = []

noise_levels = [0.01, 0.03, 0.05, 0.1]

for noise in noise_levels:
    print(f"\nNoise level: {noise}")

    # Create circles data with different noise
    X_circles, y_circles = make_circles(n_samples=1000, noise=noise, factor=0.5, random_state=42)

    # Split data
    X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
        X_circles, y_circles, test_size=0.2, random_state=42
    )

    # Convert to tensors
    X_train_c = torch.FloatTensor(X_train_c).to(device)
    y_train_c = torch.FloatTensor(y_train_c).unsqueeze(1).to(device)
    X_test_c = torch.FloatTensor(X_test_c).to(device)
    y_test_c = torch.FloatTensor(y_test_c).unsqueeze(1).to(device)

    # Create and train model
    model_c = BinaryClassifier(hidden_units=test_model_config['hidden_units'])
    trainer_c = ClassificationTrainer(model_c, device=device)
    trainer_c.train(
        X_train_c, y_train_c, X_test_c, y_test_c,
        epochs=test_model_config['epochs'],
        lr=test_model_config['learning_rate']
    )

    # Evaluate
    accuracy_c = trainer_c.evaluate(X_test_c, y_test_c)

    result = {
        'dataset': f'make_circles (noise={noise})',
        'noise_level': noise,
        'accuracy': accuracy_c,
        'type': 'circles'
    }
    exercise2_results.append(result)

    print(f"  Accuracy: {accuracy_c*100:.2f}%")

num_classes_list = [2, 3, 4, 5]

for num_classes in num_classes_list:
    print(f"\nNumber of classes: {num_classes}")

    # Create blobs data with different number of classes
    X_blobs, y_blobs = make_blobs(
        n_samples=1000,
        n_features=2,
        centers=num_classes,
        random_state=42
    )

    # Split data
    X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
        X_blobs, y_blobs, test_size=0.2, random_state=42, stratify=y_blobs
    )

    # Convert to tensors
    X_train_b = torch.FloatTensor(X_train_b).to(device)
    y_train_b = torch.LongTensor(y_train_b).to(device)
    X_test_b = torch.FloatTensor(X_test_b).to(device)
    y_test_b = torch.LongTensor(y_test_b).to(device)

    # Create multi-class model
    model_b = MultiClassClassifier(
        num_classes=num_classes,
        hidden_units=test_model_config['hidden_units']
    )
    model_b = model_b.to(device)

    # Train
    criterion_b = nn.CrossEntropyLoss()
    optimizer_b = optim.Adam(model_b.parameters(), lr=test_model_config['learning_rate'])

    for epoch in range(test_model_config['epochs']):
        model_b.train()
        outputs = model_b(X_train_b)
        loss = criterion_b(outputs, y_train_b)
        optimizer_b.zero_grad()
        loss.backward()
        optimizer_b.step()

    # Evaluate
    model_b.eval()
    with torch.inference_mode():
        test_preds_b = torch.argmax(model_b(X_test_b), dim=1)
        accuracy_b = (test_preds_b == y_test_b).float().mean().item()

    result = {
        'dataset': f'make_blobs (classes={num_classes})',
        'num_classes': num_classes,
        'accuracy': accuracy_b,
        'type': 'blobs'
    }
    exercise2_results.append(result)

    print(f"  Accuracy: {accuracy_b*100:.2f}%")


In [None]:
print("\n" + "=" * 60)
print("Exercise 3: Implement early stopping")
print("=" * 60)

class ClassificationTrainerWithEarlyStopping(ClassificationTrainer):

    def train(self, X_train, y_train, X_test, y_test,
              epochs=100, lr=0.01, patience=10):

        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=lr)

        best_test_loss = float('inf')
        patience_counter = 0
        stopped_epoch = None

        for epoch in range(epochs):
            # Training
            self.model.train()
            y_logits = self.model(X_train)
            loss = criterion(y_logits, y_train)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Testing
            self.model.eval()
            with torch.inference_mode():
                test_logits = self.model(X_test)
                test_loss = criterion(test_logits, y_test)

            self.history['train_loss'].append(loss.item())
            self.history['test_loss'].append(test_loss.item())

            # Early stopping logic
            if test_loss.item() < best_test_loss:
                best_test_loss = test_loss.item()
                patience_counter = 0
            else:
                patience_counter += 1

            if (epoch + 1) % 20 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}, Patience: {patience_counter}/{patience}')

            # Stop if patience exceeded
            if patience_counter >= patience:
                stopped_epoch = epoch + 1
                print(f'Early stopping at epoch {stopped_epoch} (patience: {patience})')
                break

        self.stopped_epoch = stopped_epoch

    def evaluate(self, X_test, y_test):

        self.model.eval()
        with torch.inference_mode():
            test_probs = torch.sigmoid(self.model(X_test))
            test_preds = (test_probs > 0.5).long()
            accuracy = (test_preds == y_test).float().mean()

        return accuracy.item()


# Test early stopping with different patience values
patience_values = [5, 10, 15, 20]
exercise3_results = []

print("\nTesting early stopping with different patience values:")
print("Using model configuration: hidden=[16, 8], lr=0.01")

for patience in patience_values:
    print(f"\nPatience: {patience}")

    # Create a fresh model
    model_es = BinaryClassifier(hidden_units=[16, 8])

    # Create trainer with early stopping
    trainer_es = ClassificationTrainerWithEarlyStopping(model_es, device=device)

    # Train with early stopping
    trainer_es.train(
        X_train, y_train, X_test, y_test,
        epochs=200,
        lr=0.01,
        patience=patience
    )

    # Evaluate
    accuracy_es = trainer_es.evaluate(X_test, y_test)

    result = {
        'patience': patience,
        'stopped_epoch': trainer_es.stopped_epoch if hasattr(trainer_es, 'stopped_epoch') else 200,
        'accuracy': accuracy_es,
        'num_epochs_trained': len(trainer_es.history['train_loss']),
        'history': trainer_es.history.copy()
    }
    exercise3_results.append(result)

    print(f"  Stopped at epoch: {result['stopped_epoch']}")
    print(f"  Final Accuracy: {accuracy_es*100:.2f}%")
    print(f"  Total epochs trained: {result['num_epochs_trained']}")



In [None]:

import time

print("\n" + "=" * 60)
print("Exercise 4: Create model comparison report")
print("=" * 60)

# Define different model configurations to compare
comparison_configs = [
    {'name': 'Linear', 'hidden_units': []},
    {'name': 'Small', 'hidden_units': [8]},
    {'name': 'Medium', 'hidden_units': [16, 8]},
    {'name': 'Large', 'hidden_units': [32, 16, 8]},
    {'name': 'Very Large', 'hidden_units': [64, 32, 16]},
]

exercise4_results = []

print("\nTraining and comparing different models...")
print(f"Dataset: Binary Classification (circles)")
print(f"Epochs: 100, Learning Rate: 0.01\n")

for config in comparison_configs:
    print(f"Training {config['name']} model...")

    # Create model
    model_comp = BinaryClassifier(hidden_units=config['hidden_units'])
    model_comp = model_comp.to(device)

    # Count parameters
    num_params = sum(p.numel() for p in model_comp.parameters())

    # Start timing
    start_time = time.time()

    # Train model
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model_comp.parameters(), lr=0.01)

    for epoch in range(100):
        model_comp.train()
        y_logits = model_comp(X_train)
        loss = criterion(y_logits, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # End timing
    training_time = time.time() - start_time

    # Evaluate
    model_comp.eval()
    with torch.inference_mode():
        test_preds = (torch.sigmoid(model_comp(X_test)) > 0.5).long()
        accuracy = (test_preds == y_test).float().mean().item()

    result = {
        'name': config['name'],
        'hidden_units': config['hidden_units'],
        'num_parameters': num_params,
        'training_time': training_time,
        'accuracy': accuracy
    }
    exercise4_results.append(result)

    print(f" Accuracy: {accuracy*100:.2f}%, Parameters: {num_params:,}, Time: {training_time:.2f}s\n")


# Generate Comparison Report
print(f"\n{'Model':<15} {'Architecture':<25} {'Parameters':<15} {'Accuracy':<12} {'Time (s)':<10}")
print("-" * 77)

for result in exercise4_results:
    arch_str = str(result['hidden_units']) if result['hidden_units'] else "[Direct Output]"
    print(f"{result['name']:<15} {arch_str:<25} {result['num_parameters']:<15,} {result['accuracy']*100:>10.2f}% {result['training_time']:>9.2f}s")

# Calculate statistics
print("\n" + "=" * 60)
print("SUMMARY STATISTICS")
print("=" * 60)

accuracies = [r['accuracy'] for r in exercise4_results]
params = [r['num_parameters'] for r in exercise4_results]
times = [r['training_time'] for r in exercise4_results]

print(f"\nAccuracy:")
print(f"  Highest: {max(accuracies)*100:.2f}% ({exercise4_results[accuracies.index(max(accuracies))]['name']})")
print(f"  Lowest:  {min(accuracies)*100:.2f}% ({exercise4_results[accuracies.index(min(accuracies))]['name']})")
print(f"  Average: {sum(accuracies)/len(accuracies)*100:.2f}%")

print(f"\nParameters:")
print(f"  Highest: {max(params):,} ({exercise4_results[params.index(max(params))]['name']})")
print(f"  Lowest:  {min(params):,} ({exercise4_results[params.index(min(params))]['name']})")
print(f"  Average: {sum(params)//len(params):,}")

print(f"\nTraining Time:")
print(f"  Slowest: {max(times):.2f}s ({exercise4_results[times.index(max(times))]['name']})")
print(f"  Fastest: {min(times):.2f}s ({exercise4_results[times.index(min(times))]['name']})")
print(f"  Average: {sum(times)/len(times):.2f}s")

# Find best model by accuracy
best_accuracy_model = max(exercise4_results, key=lambda x: x['accuracy'])
print(f"\nBest Model by Accuracy: {best_accuracy_model['name']} ({best_accuracy_model['accuracy']*100:.2f}%)")

# Find most efficient model
efficiency = [(r['accuracy'] / r['num_parameters'], r) for r in exercise4_results]
most_efficient = max(efficiency, key=lambda x: x[0])[1]
print(f"Most Efficient Model: {most_efficient['name']} (accuracy/parameter: {most_efficient['accuracy']/most_efficient['num_parameters']:.6f})")

# Find fastest model
fastest_model = min(exercise4_results, key=lambda x: x['training_time'])
print(f"Fastest Model: {fastest_model['name']} ({fastest_model['training_time']:.2f}s)")


In [None]:
from datetime import datetime
import json

print("\n" + "=" * 60)
print("Exercise 5: Save/load with metadata")
print("=" * 60)

# Create directory
metadata_dir = Path('models_with_metadata')
metadata_dir.mkdir(exist_ok=True)

print("\nSaving models with metadata...\n")

models_to_save = [
    {
        'name': 'small_model',
        'config': {'hidden_units': [8]},
        'model': BinaryClassifier(hidden_units=[8])
    },
    {
        'name': 'medium_model',
        'config': {'hidden_units': [16, 8]},
        'model': BinaryClassifier(hidden_units=[16, 8])
    },
    {
        'name': 'large_model',
        'config': {'hidden_units': [32, 16, 8]},
        'model': BinaryClassifier(hidden_units=[32, 16, 8])
    }
]

saved_models_info = []

for model_info in models_to_save:
    model_name = model_info['name']
    config = model_info['config']
    model = model_info['model'].to(device)

    print(f"Processing {model_name}...")

    # Train the model quickly
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(100):
        model.train()
        y_logits = model(X_train)
        loss = criterion(y_logits, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluate
    model.eval()
    with torch.inference_mode():
        test_preds = (torch.sigmoid(model(X_test)) > 0.5).long()
        accuracy = (test_preds == y_test).float().mean().item()

    # Create metadata dictionary
    metadata = {
        'model_name': model_name,
        'save_datetime': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'hyperparameters': {
            'hidden_units': config['hidden_units'],
            'learning_rate': 0.01,
            'epochs': 100
        },
        'training_metrics': {
            'test_accuracy': accuracy,
            'final_train_loss': loss.item()
        },
        'model_info': {
            'num_parameters': sum(p.numel() for p in model.parameters()),
            'device': str(device)
        }
    }

    # Save model state dict
    model_path = metadata_dir / f'{model_name}_weights.pth'
    torch.save(model.state_dict(), model_path)

    # Save metadata using pickle
    metadata_path = metadata_dir / f'{model_name}_metadata.pkl'
    with open(metadata_path, 'wb') as f:
        pickle.dump(metadata, f)

    # Also save metadata as JSON for readability
    json_path = metadata_dir / f'{model_name}_metadata.json'
    with open(json_path, 'w') as f:
        json.dump(metadata, f, indent=4)

    saved_models_info.append({
        'model_name': model_name,
        'weights_path': str(model_path),
        'metadata_path': str(metadata_path),
        'metadata': metadata
    })

    print(f"   Saved: {model_path}")
    print(f"   Metadata: {metadata_path}")
    print(f"   JSON: {json_path}\n")


loaded_models_info = []

for saved_info in saved_models_info:
    model_name = saved_info['model_name']
    weights_path = saved_info['weights_path']
    metadata_path = saved_info['metadata_path']

    print(f"Loading {model_name}...")

    # Load metadata from pickle
    with open(metadata_path, 'rb') as f:
        loaded_metadata = pickle.load(f)

    # Load model weights
    loaded_model = BinaryClassifier(
        hidden_units=loaded_metadata['hyperparameters']['hidden_units']
    )
    loaded_model.load_state_dict(torch.load(weights_path))
    loaded_model = loaded_model.to(device)
    loaded_model.eval()

    # Verify model works
    with torch.inference_mode():
        test_preds = (torch.sigmoid(loaded_model(X_test)) > 0.5).long()
        accuracy = (test_preds == y_test).float().mean().item()

    loaded_models_info.append({
        'model_name': model_name,
        'metadata': loaded_metadata,
        'loaded_accuracy': accuracy
    })

    print(f"   Model loaded successfully")
    print(f"   Verified accuracy: {accuracy*100:.2f}%\n")

#

for saved_info in saved_models_info:
    metadata = saved_info['metadata']

    print(f"\nModel: {metadata['model_name']}")
    print(f"  Saved: {metadata['save_datetime']}")
    print(f"  Hyperparameters:")
    print(f"    - Hidden Units: {metadata['hyperparameters']['hidden_units']}")
    print(f"    - Learning Rate: {metadata['hyperparameters']['learning_rate']}")
    print(f"    - Epochs: {metadata['hyperparameters']['epochs']}")
    print(f"  Training Metrics:")
    print(f"    - Test Accuracy: {metadata['training_metrics']['test_accuracy']*100:.2f}%")
    print(f"    - Final Train Loss: {metadata['training_metrics']['final_train_loss']:.4f}")
    print(f"  Model Info:")
    print(f"    - Parameters: {metadata['model_info']['num_parameters']:,}")
    print(f"    - Device: {metadata['model_info']['device']}")

#  Verify loaded models

print(f"\n{'Model':<20} {'Original Accuracy':<20} {'Loaded Accuracy':<20} {'Match':<10}")
print("-" * 70)

for i, saved_info in enumerate(saved_models_info):
    original_acc = saved_info['metadata']['training_metrics']['test_accuracy']
    loaded_acc = loaded_models_info[i]['loaded_accuracy']
    match = " Yes" if abs(original_acc - loaded_acc) < 0.001 else "âœ— No"

    print(f"{saved_info['model_name']:<20} {original_acc*100:>18.2f}% {loaded_acc*100:>18.2f}% {match:<10}")