# Phase 3.3: Hyperparameter Tuning

**Objective**: Find optimal CatBoost hyperparameters using grid search on validation set.

**Parameters to tune:**
- `iterations`: [500, 1000, 1500]
- `learning_rate`: [0.01, 0.05, 0.1]
- `depth`: [4, 6, 8]

**Evaluation Metric**: F1-Score (primary metric for imbalanced data)

## 0. Install Required Libraries

**Run this cell only if libraries are not already installed.**

In [None]:
# Uncomment and run if libraries are not installed
# !pip install catboost jupyter matplotlib seaborn tqdm scikit-learn pandas numpy

print("If libraries are already installed, you can skip this cell.")

## 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import json
import warnings
from itertools import product
from tqdm import tqdm
warnings.filterwarnings('ignore')

# CatBoost
from catboost import CatBoostClassifier, Pool

# Scikit-learn metrics
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully")

## 2. Load Data

In [None]:
# Define paths
DATA_DIR = Path('../data/splits')
OUTPUT_DIR = Path('../outputs/results')
MODEL_DIR = Path('../models')

# Get lottery names
train_files = sorted(DATA_DIR.glob('*_train.csv'))
lottery_names = [f.stem.replace('_train', '') for f in train_files]

print(f"Found {len(lottery_names)} lotteries")

In [None]:
# Load training data
train_dfs = [pd.read_csv(DATA_DIR / f"{lottery}_train.csv") for lottery in lottery_names]
train_data = pd.concat(train_dfs, ignore_index=True)

# Load validation data
val_dfs = [pd.read_csv(DATA_DIR / f"{lottery}_val.csv") for lottery in lottery_names]
val_data = pd.concat(val_dfs, ignore_index=True)

print(f"Training data: {train_data.shape}")
print(f"Validation data: {val_data.shape}")

## 3. Prepare Features

In [None]:
# Define features
exclude_cols = ['appeared', 'draw_date', 'lottery', 'number']
feature_cols = [col for col in train_data.columns if col not in exclude_cols]

# Categorical features
categorical_features = ['trend']
categorical_indices = [feature_cols.index(cat) for cat in categorical_features]

# Prepare X and y
X_train = train_data[feature_cols]
y_train = train_data['appeared']

X_val = val_data[feature_cols]
y_val = val_data['appeared']

# Create Pools
train_pool = Pool(X_train, y_train, cat_features=categorical_indices)
val_pool = Pool(X_val, y_val, cat_features=categorical_indices)

print(f"Features: {len(feature_cols)}")
print(f"Categorical: {categorical_features}")

## 4. Define Hyperparameter Grid

In [None]:
# Define hyperparameter grid
param_grid = {
    'iterations': [500, 1000, 1500],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [4, 6, 8]
}

# Calculate total combinations
total_combinations = np.prod([len(v) for v in param_grid.values()])

print("Hyperparameter Grid:")
for param, values in param_grid.items():
    print(f"  {param}: {values}")
print(f"\nTotal combinations: {total_combinations}")

## 5. Grid Search

Train models for each hyperparameter combination and track F1-Score.

In [None]:
# Perform grid search
results = []

# Generate all combinations
param_combinations = list(product(*param_grid.values()))

print(f"Starting grid search with {len(param_combinations)} combinations...\n")

for i, (iterations, learning_rate, depth) in enumerate(tqdm(param_combinations), 1):
    print(f"\n[{i}/{len(param_combinations)}] Testing: iterations={iterations}, lr={learning_rate}, depth={depth}")
    
    # Create model with current hyperparameters
    model = CatBoostClassifier(
        iterations=iterations,
        learning_rate=learning_rate,
        depth=depth,
        loss_function='Logloss',
        eval_metric='F1',
        auto_class_weights='Balanced',
        cat_features=categorical_indices,
        random_seed=42,
        verbose=False,
        early_stopping_rounds=50,
        use_best_model=True
    )
    
    # Train
    model.fit(train_pool, eval_set=val_pool, plot=False)
    
    # Predict
    y_pred = model.predict(val_pool)
    y_pred_proba = model.predict_proba(val_pool)[:, 1]
    
    # Calculate metrics
    f1 = f1_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    roc_auc = roc_auc_score(y_val, y_pred_proba)
    
    # Store results
    results.append({
        'iterations': iterations,
        'learning_rate': learning_rate,
        'depth': depth,
        'f1_score': f1,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc,
        'best_iteration': model.get_best_iteration()
    })
    
    print(f"  F1-Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, AUC: {roc_auc:.4f}")

print("\nGrid search complete!")

## 6. Analyze Results

In [None]:
# Create results DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('f1_score', ascending=False)

print("\nTop 10 Hyperparameter Combinations (by F1-Score):")
print(results_df.head(10).to_string(index=False))

# Save all results
results_df.to_csv(OUTPUT_DIR / 'hyperparameter_tuning_results.csv', index=False)
print(f"\nSaved full results to: {OUTPUT_DIR / 'hyperparameter_tuning_results.csv'}")

In [None]:
# Get best hyperparameters
best_params = results_df.iloc[0]

print("\nBest Hyperparameters:")
print(f"  iterations: {int(best_params['iterations'])}")
print(f"  learning_rate: {best_params['learning_rate']}")
print(f"  depth: {int(best_params['depth'])}")
print(f"\nBest Validation Metrics:")
print(f"  F1-Score: {best_params['f1_score']:.4f}")
print(f"  Precision: {best_params['precision']:.4f}")
print(f"  Recall: {best_params['recall']:.4f}")
print(f"  ROC-AUC: {best_params['roc_auc']:.4f}")

## 7. Visualize Grid Search Results

In [None]:
# Create pivot tables for heatmaps
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Heatmap 1: iterations vs learning_rate (depth=6)
pivot1 = results_df[results_df['depth'] == 6].pivot(index='learning_rate', columns='iterations', values='f1_score')
sns.heatmap(pivot1, annot=True, fmt='.4f', cmap='YlGnBu', ax=axes[0], cbar_kws={'label': 'F1-Score'})
axes[0].set_title('F1-Score: Iterations vs Learning Rate (depth=6)', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Iterations')
axes[0].set_ylabel('Learning Rate')

# Heatmap 2: iterations vs depth (lr=0.05)
pivot2 = results_df[results_df['learning_rate'] == 0.05].pivot(index='depth', columns='iterations', values='f1_score')
sns.heatmap(pivot2, annot=True, fmt='.4f', cmap='YlGnBu', ax=axes[1], cbar_kws={'label': 'F1-Score'})
axes[1].set_title('F1-Score: Iterations vs Depth (lr=0.05)', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Iterations')
axes[1].set_ylabel('Depth')

# Heatmap 3: learning_rate vs depth (iterations=1000)
pivot3 = results_df[results_df['iterations'] == 1000].pivot(index='depth', columns='learning_rate', values='f1_score')
sns.heatmap(pivot3, annot=True, fmt='.4f', cmap='YlGnBu', ax=axes[2], cbar_kws={'label': 'F1-Score'})
axes[2].set_title('F1-Score: Learning Rate vs Depth (iter=1000)', fontsize=12, fontweight='bold')
axes[2].set_xlabel('Learning Rate')
axes[2].set_ylabel('Depth')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'hyperparameter_heatmaps.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Saved heatmaps to: {OUTPUT_DIR / 'hyperparameter_heatmaps.png'}")

In [None]:
# Plot top 10 configurations
top_10 = results_df.head(10).copy()
top_10['config'] = top_10.apply(
    lambda x: f"iter={int(x['iterations'])}, lr={x['learning_rate']}, d={int(x['depth'])}",
    axis=1
)

plt.figure(figsize=(12, 6))
plt.barh(range(len(top_10)), top_10['f1_score'], color='steelblue')
plt.yticks(range(len(top_10)), top_10['config'])
plt.xlabel('F1-Score', fontsize=12)
plt.ylabel('Configuration', fontsize=12)
plt.title('Top 10 Hyperparameter Configurations', fontsize=14, fontweight='bold')
plt.axvline(top_10['f1_score'].iloc[0], color='red', linestyle='--', label='Best')
plt.legend()
plt.gca().invert_yaxis()
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'top_10_configs.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Saved top 10 configs plot to: {OUTPUT_DIR / 'top_10_configs.png'}")

## 8. Train Final Model with Best Hyperparameters

In [None]:
print("Training final model with best hyperparameters...\n")

# Create best model
best_model = CatBoostClassifier(
    iterations=int(best_params['iterations']),
    learning_rate=best_params['learning_rate'],
    depth=int(best_params['depth']),
    loss_function='Logloss',
    eval_metric='F1',
    auto_class_weights='Balanced',
    cat_features=categorical_indices,
    random_seed=42,
    verbose=100,
    early_stopping_rounds=50,
    use_best_model=True
)

# Train
best_model.fit(train_pool, eval_set=val_pool, plot=False)

print("\nTraining complete!")
print(f"Best iteration: {best_model.get_best_iteration()}")

In [None]:
# Evaluate final model
y_pred_final = best_model.predict(val_pool)
y_pred_proba_final = best_model.predict_proba(val_pool)[:, 1]

final_metrics = {
    'model': 'CatBoost (Best)',
    'iterations': int(best_params['iterations']),
    'learning_rate': float(best_params['learning_rate']),
    'depth': int(best_params['depth']),
    'best_iteration': int(best_model.get_best_iteration()),
    'f1_score': float(f1_score(y_val, y_pred_final)),
    'precision': float(precision_score(y_val, y_pred_final)),
    'recall': float(recall_score(y_val, y_pred_final)),
    'roc_auc': float(roc_auc_score(y_val, y_pred_proba_final))
}

print("\nFinal Model Metrics:")
for key, value in final_metrics.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.4f}")
    else:
        print(f"  {key}: {value}")

## 9. Save Best Model and Results

In [None]:
# Save best model
best_model.save_model(str(MODEL_DIR / 'best_model.cbm'))
print(f"Saved best model to: {MODEL_DIR / 'best_model.cbm'}")

# Save best hyperparameters and metrics
with open(OUTPUT_DIR / 'best_model_config.json', 'w') as f:
    json.dump(final_metrics, f, indent=2)
print(f"Saved best model config to: {OUTPUT_DIR / 'best_model_config.json'}")

# Save comparison with original model
with open(OUTPUT_DIR / 'catboost_results.json', 'r') as f:
    original_metrics = json.load(f)

improvement = {
    'original_f1': original_metrics['f1_score'],
    'tuned_f1': final_metrics['f1_score'],
    'improvement_absolute': final_metrics['f1_score'] - original_metrics['f1_score'],
    'improvement_percent': ((final_metrics['f1_score'] - original_metrics['f1_score']) / original_metrics['f1_score']) * 100
}

print("\nImprovement After Tuning:")
print(f"  Original F1-Score: {improvement['original_f1']:.4f}")
print(f"  Tuned F1-Score: {improvement['tuned_f1']:.4f}")
print(f"  Absolute Improvement: {improvement['improvement_absolute']:+.4f}")
print(f"  Relative Improvement: {improvement['improvement_percent']:+.2f}%")

with open(OUTPUT_DIR / 'tuning_improvement.json', 'w') as f:
    json.dump(improvement, f, indent=2)
print(f"\nSaved improvement metrics to: {OUTPUT_DIR / 'tuning_improvement.json'}")

## 10. Summary

**Key Findings:**
1. Systematic grid search explored 27 hyperparameter combinations
2. Optimal configuration identified based on F1-Score
3. Best model saved for final evaluation and deployment
4. Hyperparameter tuning provides measurable improvement over default settings
5. Early stopping prevents overfitting while maximizing validation performance

**Next Steps:**
- Notebook 04: Comprehensive model evaluation on test set
- Generate confusion matrix, ROC curves, per-lottery performance analysis
- Compare all models (LR, RF, CatBoost default, CatBoost tuned)