# Model Tuning & Hyperparameter Optimization

## Week 6: MLOps - Make Your Models Better

This notebook demonstrates hyperparameter tuning, cross-validation, and experiment tracking.

---

## Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

print("Libraries loaded!")

## Prepare Data

In [None]:
# Create sample dataset
np.random.seed(42)
n_samples = 200

X = np.random.randn(n_samples, 5)
y = (X[:, 0] + X[:, 1] > 0).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train_scaled.shape}")
print(f"Test set: {X_test_scaled.shape}")

## Part 1: Baseline Model

In [None]:
# Train baseline model with default hyperparameters
baseline_model = RandomForestClassifier(random_state=42)
baseline_model.fit(X_train_scaled, y_train)

# Evaluate
y_pred_baseline = baseline_model.predict(X_test_scaled)

baseline_accuracy = accuracy_score(y_test, y_pred_baseline)
baseline_f1 = f1_score(y_test, y_pred_baseline)

print("="*50)
print("BASELINE MODEL (Default Hyperparameters)")
print("="*50)
print(f"Accuracy: {baseline_accuracy:.4f}")
print(f"F1-Score: {baseline_f1:.4f}")

## Part 2: Cross-Validation Analysis

In [None]:
# 5-Fold Cross-Validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cv_scores = cross_val_score(baseline_model, X_train_scaled, y_train, cv=cv, scoring='f1')

print("="*50)
print("5-FOLD CROSS-VALIDATION")
print("="*50)
for i, score in enumerate(cv_scores, 1):
    print(f"Fold {i}: {score:.4f}")

print(f"\nMean: {cv_scores.mean():.4f}")
print(f"Std Dev: {cv_scores.std():.4f}")
print(f"\nInterpretation: Model performance is {'STABLE' if cv_scores.std() < 0.05 else 'VARIABLE'} across folds")

## Part 3: Grid Search Hyperparameter Tuning

In [None]:
# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 7, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

print(f"Total hyperparameter combinations: {np.prod([len(v) for v in param_grid.values()])}")
print(f"Estimated time: Several minutes to an hour...\n")

# Perform Grid Search
grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,  # Use all CPUs
    verbose=1
)

print("Starting Grid Search...")
grid_search.fit(X_train_scaled, y_train)
print("Grid Search Complete!")

### Best Parameters Found

In [None]:
# Best parameters and score
best_params = grid_search.best_params_
best_cv_score = grid_search.best_score_

print("="*50)
print("BEST HYPERPARAMETERS")
print("="*50)
for param, value in best_params.items():
    print(f"{param}: {value}")
print(f"\nBest CV Score (F1): {best_cv_score:.4f}")

### Top 10 Parameter Combinations

In [None]:
# Results DataFrame
results_df = pd.DataFrame(grid_search.cv_results_)
results_df = results_df.sort_values('rank_test_score')

print("\nTop 10 Parameter Combinations:")
print("="*70)

top_10 = results_df.head(10)[['param_n_estimators', 'param_max_depth', 'param_min_samples_split', 
                               'param_min_samples_leaf', 'mean_test_score', 'std_test_score']]
top_10.columns = ['n_est', 'max_d', 'min_split', 'min_leaf', 'CV_F1', 'Std']

for idx, row in top_10.iterrows():
    print(f"\n{row.name + 1}: CV F1 = {row['CV_F1']:.4f} ± {row['Std']:.4f}")
    print(f"   n_est={row['n_est']}, max_d={row['max_d']}, min_split={row['min_split']}, min_leaf={row['min_leaf']}")

## Part 4: Evaluate Tuned Model

In [None]:
# Use best model for predictions
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test_scaled)

# Calculate metrics
tuned_accuracy = accuracy_score(y_test, y_pred_tuned)
tuned_precision = precision_score(y_test, y_pred_tuned)
tuned_recall = recall_score(y_test, y_pred_tuned)
tuned_f1 = f1_score(y_test, y_pred_tuned)

print("="*50)
print("TUNED MODEL EVALUATION")
print("="*50)
print(f"Accuracy: {tuned_accuracy:.4f}")
print(f"Precision: {tuned_precision:.4f}")
print(f"Recall: {tuned_recall:.4f}")
print(f"F1-Score: {tuned_f1:.4f}")

## Part 5: Comparison - Baseline vs Tuned

In [None]:
# Create comparison table
comparison = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score'],
    'Baseline': [
        accuracy_score(y_test, baseline_model.predict(X_test_scaled)),
        precision_score(y_test, baseline_model.predict(X_test_scaled)),
        recall_score(y_test, baseline_model.predict(X_test_scaled)),
        baseline_f1
    ],
    'Tuned': [tuned_accuracy, tuned_precision, tuned_recall, tuned_f1]
})

comparison['Improvement'] = comparison['Tuned'] - comparison['Baseline']
comparison['Improvement %'] = (comparison['Improvement'] / comparison['Baseline'] * 100).round(2)

print("\n" + "="*70)
print("BASELINE vs TUNED MODEL COMPARISON")
print("="*70)
print(comparison.to_string(index=False))

total_improvement = comparison['Improvement %'].mean()
print(f"\n✓ Average Improvement: {total_improvement:.2f}%")

### Visualization

In [None]:
# Bar chart comparison
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(comparison))
width = 0.35

ax.bar(x - width/2, comparison['Baseline'], width, label='Baseline', color='lightcoral')
ax.bar(x + width/2, comparison['Tuned'], width, label='Tuned', color='lightgreen')

ax.set_ylabel('Score')
ax.set_title('Baseline vs Tuned Model Performance', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(comparison['Metric'])
ax.legend()
ax.set_ylim([0, 1])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## Part 6: Feature Importance Analysis

In [None]:
# Feature importances from tuned model
importances = best_model.feature_importances_
indices = np.argsort(importances)[::-1]

print("\n" + "="*50)
print("FEATURE IMPORTANCE (Tuned Model)")
print("="*50)
for i in range(len(importances)):
    print(f"Feature {indices[i]}: {importances[indices[i]]:.4f}")

# Visualize
plt.figure(figsize=(10, 5))
plt.bar(range(len(importances)), importances[indices], color='steelblue')
plt.xlabel('Feature Index')
plt.ylabel('Importance')
plt.title('Feature Importance Distribution', fontsize=14)
plt.tight_layout()
plt.show()

## Summary & Key Learnings

In [None]:
print("\n" + "="*70)
print("WEEK 6 SUMMARY: MLOps & HYPERPARAMETER TUNING")
print("="*70)

print("\n✓ WHAT WE ACCOMPLISHED:")
print("  1. Established baseline model performance")
print("  2. Performed 5-fold cross-validation for robust evaluation")
print("  3. Used GridSearchCV to find optimal hyperparameters")
print(f"  4. Improved F1-Score from {baseline_f1:.4f} to {tuned_f1:.4f} (+{(tuned_f1-baseline_f1)/baseline_f1*100:.1f}%)")
print(f"  5. Explored {len(results_df)} hyperparameter combinations")

print("\n✓ KEY HYPERPARAMETERS:")
for param, value in best_params.items():
    print(f"  - {param}: {value}")

print("\n✓ BEST PRACTICES APPLIED:")
print("  - Train-test split for unbiased evaluation")
print("  - Cross-validation for stability assessment")
print("  - Systematic hyperparameter search")
print("  - Multiple evaluation metrics (not just accuracy)")
print("  - Feature importance analysis")

print("\n→ NEXT WEEK: Deployment & Capstone Project")
print("="*70)