[Reference](https://medium.com/@harishk3493/optuna-the-hyperparameter-optimization-framework-that-saved-my-machine-learning-sanity-315febd8f722)

In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [3]:
import optuna
import xgboost as xgb
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_breast_cancer

# Load sample data
X, y = load_breast_cancer(return_X_y=True)

def objective(trial):
    # Define hyperparameter search space
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
    }

    # Create and evaluate model
    model = xgb.XGBClassifier(**params, random_state=42)
    score = cross_val_score(model, X, y, cv=5, scoring='accuracy').mean()

    return score

# Create study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

[I 2025-09-16 13:40:55,409] A new study created in memory with name: no-name-646082d2-7a32-404f-ad6f-edbdcc61fb7b
[I 2025-09-16 13:40:59,502] Trial 0 finished with value: 0.9630957925787922 and parameters: {'max_depth': 5, 'learning_rate': 0.03485215151170995, 'n_estimators': 155, 'subsample': 0.6244171001452504, 'colsample_bytree': 0.959407615349203}. Best is trial 0 with value: 0.9630957925787922.
[I 2025-09-16 13:41:04,389] Trial 1 finished with value: 0.9736221083682658 and parameters: {'max_depth': 8, 'learning_rate': 0.11346228710974687, 'n_estimators': 202, 'subsample': 0.8580416800987205, 'colsample_bytree': 0.6427383915112795}. Best is trial 1 with value: 0.9736221083682658.
[I 2025-09-16 13:41:08,011] Trial 2 finished with value: 0.9666200900481293 and parameters: {'max_depth': 8, 'learning_rate': 0.1158036361703085, 'n_estimators': 128, 'subsample': 0.6684521911982491, 'colsample_bytree': 0.8747135611590535}. Best is trial 1 with value: 0.9736221083682658.
[I 2025-09-16 13:4

Best parameters: {'max_depth': 9, 'learning_rate': 0.14122818833599388, 'n_estimators': 264, 'subsample': 0.8100562692908606, 'colsample_bytree': 0.6014627220349051}
Best accuracy: 0.9806551777674274


# Method 1: Manual Hyperparameters (Expert Intuition)

In [5]:
manual_params = {
    'max_depth': 8,
    'learning_rate': 0.1,
    'n_estimators': 300,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'min_child_weight': 5,
    'gamma': 1,
    'reg_alpha': 1,
    'reg_lambda': 1,
    'scale_pos_weight': 1.5,  # Adjusted for class imbalance
}

# Method 2: RandomizedSearchCV (Random Sampling)
RandomizedSearchCV showed significant improvement over manual tuning, demonstrating the value of exploring multiple parameter combinations. However, this approach treats each trial independently without learning from previous attempts.

# Method 3: Optuna with Intelligent Pruning
```
def objective_with_pruning(trial):
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        # ... other parameters
    }
    
    model = xgb.XGBClassifier(**params)
    
    # Cross-validation with pruning callback
    scores = []
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        # Train and evaluate
        fold_score = model.score(X_fold_val, y_fold_val)
        scores.append(fold_score)
        
        # Report for pruning
        trial.report(fold_score, fold)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return np.mean(scores)
```

In [None]:
import optuna
import optuna.visualization as vis
import xgboost as xgb
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split, RandomizedSearchCV
from sklearn.datasets import fetch_covtype
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import randint, uniform
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load FOREST COVERTYPE DATASET - A genuinely challenging real-world dataset
print("Loading Forest Covertype Dataset...")
print("This is a REAL complex dataset from UCI repository")
print("Predicting forest cover type from cartographic variables")

# Load the challenging Forest Covertype dataset
data = fetch_covtype()
X, y = data.data, data.target

# Convert to binary classification to make it even more challenging
# Combine smaller classes to create class imbalance (realistic scenario)
print(f"Original classes: {np.unique(y)} with distribution: {np.bincount(y)}")

# Create imbalanced binary classification: Class 1&2 vs Others
y_binary = np.where((y == 1) | (y == 2), 0, 1)  # 0: Spruce/Fir, 1: Others
print(f"Binary classes distribution: {np.bincount(y_binary)}")

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42, stratify=y_binary)

print(f"\nDATASET COMPLEXITY:")
print(f"- Samples: {X.shape[0]:,}")
print(f"- Features: {X.shape[1]} (mix of continuous and categorical)")
print(f"- Train set: {X_train.shape[0]:,} samples")
print(f"- Test set: {X_test.shape[0]:,} samples")
print(f"- Class imbalance: {np.bincount(y_binary)[0]:,} vs {np.bincount(y_binary)[1]:,}")
print(f"- Real-world geographic/environmental data with complex interactions")
print("=" * 80)

# Define EXTENSIVE parameter space - this is where Optuna should excel
PARAM_RANGES = {
    'max_depth': (3, 15),                    # Tree complexity
    'learning_rate': (0.001, 0.3),          # Learning speed
    'n_estimators': (100, 1000),            # Number of trees
    'subsample': (0.5, 1.0),                # Row sampling
    'colsample_bytree': (0.3, 1.0),         # Feature sampling per tree
    'colsample_bylevel': (0.3, 1.0),        # Feature sampling per level
    'colsample_bynode': (0.3, 1.0),         # Feature sampling per node
    'min_child_weight': (1, 20),            # Minimum samples in leaf
    'gamma': (0, 10),                       # Minimum split loss
    'reg_alpha': (0, 10),                   # L1 regularization
    'reg_lambda': (0, 10),                  # L2 regularization
    'scale_pos_weight': (0.1, 5.0)          # Handle class imbalance
}

N_TRIALS = 100  # Increased for better pruning demonstration

print("OPTIMIZATION CHALLENGE:")
print(f"- {len(PARAM_RANGES)} hyperparameters to tune")
print(f"- Complex parameter interactions")
print(f"- Large dataset requiring longer training times")
print(f"- {N_TRIALS} trials budget")
print(f"- Real-world data with noise and complexity")
print()

# ========================
# 1. MANUAL HYPERPARAMETERS
# ========================
print("1. MANUAL HYPERPARAMETERS (Expert Intuition)")
print("-" * 50)

# Expert parameters for large, imbalanced dataset
manual_params = {
    'max_depth': 8,
    'learning_rate': 0.1,
    'n_estimators': 300,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'colsample_bylevel': 0.8,
    'colsample_bynode': 0.8,
    'min_child_weight': 5,
    'gamma': 1,
    'reg_alpha': 1,
    'reg_lambda': 1,
    'scale_pos_weight': 1.5,  # Adjust for class imbalance
    'random_state': 42
}

start_time = time.time()
manual_model = xgb.XGBClassifier(**manual_params)

# Cross-validation for manual model (3-fold due to dataset size)
manual_cv_scores = cross_val_score(manual_model, X_train, y_train, cv=3, scoring='accuracy')
manual_cv_mean = manual_cv_scores.mean()
manual_cv_std = manual_cv_scores.std()

# Train on full training set and test
manual_model.fit(X_train, y_train)
manual_predictions = manual_model.predict(X_test)
manual_test_accuracy = accuracy_score(y_test, manual_predictions)
manual_time = time.time() - start_time

print(f"Manual Parameters: {len(manual_params)} parameters set")
print(f"Cross-validation Accuracy: {manual_cv_mean:.4f} (+/- {manual_cv_std:.4f})")
print(f"Test Accuracy: {manual_test_accuracy:.4f}")
print(f"Training Time: {manual_time:.1f} seconds")
print(f"Number of combinations tried: 1")
print("=" * 80)

# ========================
# 2. RANDOMIZED SEARCH
# ========================
print("2. RANDOMIZED SEARCH (Random Sampling)")
print("-" * 50)

# Define parameter distributions for RandomizedSearchCV
param_distributions = {
    'max_depth': randint(PARAM_RANGES['max_depth'][0], PARAM_RANGES['max_depth'][1] + 1),
    'learning_rate': uniform(PARAM_RANGES['learning_rate'][0],
                           PARAM_RANGES['learning_rate'][1] - PARAM_RANGES['learning_rate'][0]),
    'n_estimators': randint(PARAM_RANGES['n_estimators'][0], PARAM_RANGES['n_estimators'][1] + 1),
    'subsample': uniform(PARAM_RANGES['subsample'][0],
                        PARAM_RANGES['subsample'][1] - PARAM_RANGES['subsample'][0]),
    'colsample_bytree': uniform(PARAM_RANGES['colsample_bytree'][0],
                               PARAM_RANGES['colsample_bytree'][1] - PARAM_RANGES['colsample_bytree'][0]),
    'colsample_bylevel': uniform(PARAM_RANGES['colsample_bylevel'][0],
                                PARAM_RANGES['colsample_bylevel'][1] - PARAM_RANGES['colsample_bylevel'][0]),
    'colsample_bynode': uniform(PARAM_RANGES['colsample_bynode'][0],
                               PARAM_RANGES['colsample_bynode'][1] - PARAM_RANGES['colsample_bynode'][0]),
    'min_child_weight': randint(PARAM_RANGES['min_child_weight'][0], PARAM_RANGES['min_child_weight'][1] + 1),
    'gamma': uniform(PARAM_RANGES['gamma'][0], PARAM_RANGES['gamma'][1] - PARAM_RANGES['gamma'][0]),
    'reg_alpha': uniform(PARAM_RANGES['reg_alpha'][0], PARAM_RANGES['reg_alpha'][1] - PARAM_RANGES['reg_alpha'][0]),
    'reg_lambda': uniform(PARAM_RANGES['reg_lambda'][0], PARAM_RANGES['reg_lambda'][1] - PARAM_RANGES['reg_lambda'][0]),
    'scale_pos_weight': uniform(PARAM_RANGES['scale_pos_weight'][0],
                               PARAM_RANGES['scale_pos_weight'][1] - PARAM_RANGES['scale_pos_weight'][0])
}

start_time = time.time()

# Perform randomized search (reduced CV folds for speed)
random_search = RandomizedSearchCV(
    xgb.XGBClassifier(random_state=42),
    param_distributions,
    n_iter=N_TRIALS,
    cv=3,  # Reduced due to large dataset
    scoring='accuracy',
    random_state=42,
    n_jobs=-1,
    verbose=0
)

print(f"Starting RandomizedSearchCV with {N_TRIALS} trials...")
random_search.fit(X_train, y_train)

# Get best model and evaluate
random_best_params = random_search.best_params_
random_cv_score = random_search.best_score_
random_predictions = random_search.predict(X_test)
random_test_accuracy = accuracy_score(y_test, random_predictions)
random_time = time.time() - start_time

print(f"Best Parameters Found: {len(random_best_params)} parameters optimized")
print(f"Best Cross-validation Accuracy: {random_cv_score:.4f}")
print(f"Test Accuracy: {random_test_accuracy:.4f}")
print(f"Total Search Time: {random_time:.1f} seconds ({random_time/60:.1f} minutes)")
print(f"Number of combinations tried: {N_TRIALS}")
print(f"Average time per trial: {random_time/N_TRIALS:.1f} seconds")
print("=" * 80)

# ========================
# 3. OPTUNA WITH PRUNING
# ========================
print("3. OPTUNA WITH PRUNING (Smart Search + Early Stopping)")
print("-" * 50)

def objective_with_pruning(trial):
    # Define comprehensive hyperparameter search space
    params = {
        'max_depth': trial.suggest_int('max_depth', *PARAM_RANGES['max_depth']),
        'learning_rate': trial.suggest_float('learning_rate', *PARAM_RANGES['learning_rate'], log=True),
        'n_estimators': trial.suggest_int('n_estimators', *PARAM_RANGES['n_estimators']),
        'subsample': trial.suggest_float('subsample', *PARAM_RANGES['subsample']),
        'colsample_bytree': trial.suggest_float('colsample_bytree', *PARAM_RANGES['colsample_bytree']),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', *PARAM_RANGES['colsample_bylevel']),
        'colsample_bynode': trial.suggest_float('colsample_bynode', *PARAM_RANGES['colsample_bynode']),
        'min_child_weight': trial.suggest_int('min_child_weight', *PARAM_RANGES['min_child_weight']),
        'gamma': trial.suggest_float('gamma', *PARAM_RANGES['gamma']),
        'reg_alpha': trial.suggest_float('reg_alpha', *PARAM_RANGES['reg_alpha']),
        'reg_lambda': trial.suggest_float('reg_lambda', *PARAM_RANGES['reg_lambda']),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', *PARAM_RANGES['scale_pos_weight']),
        'random_state': 42
    }

    # Create model
    model = xgb.XGBClassifier(**params)

    # Perform cross-validation with pruning callback
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    scores = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        # Train model
        model.fit(X_fold_train, y_fold_train)

        # Evaluate
        fold_score = model.score(X_fold_val, y_fold_val)
        scores.append(fold_score)

        # Report intermediate value for pruning
        trial.report(fold_score, fold)

        # Check if trial should be pruned
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return np.mean(scores)

# Suppress optuna logs for cleaner output
optuna.logging.set_verbosity(optuna.logging.WARNING)

# Create study with pruning
start_time = time.time()
print(f"Starting Optuna optimization with PRUNING and {N_TRIALS} trials...")

# Use MedianPruner for early stopping of unpromising trials
study = optuna.create_study(
    direction='maximize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=10,  # Don't prune first 10 trials
        n_warmup_steps=1,     # Start pruning after 1 CV fold
        interval_steps=1      # Check for pruning after each fold
    )
)

study.optimize(objective_with_pruning, n_trials=N_TRIALS)

# Get best parameters and train final model
optuna_best_params = study.best_params.copy()
optuna_best_params['random_state'] = 42

optuna_model = xgb.XGBClassifier(**optuna_best_params)
optuna_model.fit(X_train, y_train)
optuna_predictions = optuna_model.predict(X_test)
optuna_test_accuracy = accuracy_score(y_test, optuna_predictions)
optuna_time = time.time() - start_time

# Count pruned trials
pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print(f"Optimization completed!")
print(f"✓ Total trials: {len(study.trials)}")
print(f"✓ Completed trials: {len(completed_trials)}")
print(f"✓ Pruned trials: {len(pruned_trials)} ({len(pruned_trials)/len(study.trials)*100:.1f}%)")
print(f"✓ Time saved by pruning: ~{len(pruned_trials) * (optuna_time/len(completed_trials)) * 0.5:.1f}s")
print(f"Best Parameters Found: {len(study.best_params)} parameters optimized")
print(f"Best Cross-validation Accuracy: {study.best_value:.4f}")
print(f"Test Accuracy: {optuna_test_accuracy:.4f}")
print(f"Total Optimization Time: {optuna_time:.1f} seconds ({optuna_time/60:.1f} minutes)")
print(f"Average time per completed trial: {optuna_time/len(completed_trials):.1f} seconds")
print("=" * 80)

# ========================
# 4. OPTUNA VISUALIZATIONS
# ========================
print("4. GENERATING OPTUNA VISUALIZATIONS")
print("-" * 50)

# Create a figure with multiple subplots for Optuna visualizations
fig = plt.figure(figsize=(20, 15))

# 1. Optimization History
plt.subplot(2, 3, 1)
try:
    # Extract values for plotting
    trial_numbers = [t.number for t in study.trials if t.value is not None]
    trial_values = [t.value for t in study.trials if t.value is not None]

    plt.plot(trial_numbers, trial_values, alpha=0.6, linewidth=1, label='Trial scores')

    # Plot best score progression
    best_scores = []
    current_best = -1
    for val in trial_values:
        if val > current_best:
            current_best = val
        best_scores.append(current_best)

    plt.plot(trial_numbers, best_scores, 'r-', linewidth=2, label='Best score')
    plt.axhline(y=manual_cv_mean, color='orange', linestyle='--', alpha=0.7, label=f'Manual baseline ({manual_cv_mean:.4f})')
    plt.xlabel('Trial Number')
    plt.ylabel('Accuracy')
    plt.title('Optimization History')
    plt.legend()
    plt.grid(True, alpha=0.3)
except Exception as e:
    plt.text(0.5, 0.5, f'Optimization History\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

# 2. Parameter Importances
plt.subplot(2, 3, 2)
try:
    param_importance = optuna.importance.get_param_importances(study)
    params = list(param_importance.keys())[:8]  # Top 8 parameters
    importances = [param_importance[p] for p in params]

    y_pos = np.arange(len(params))
    bars = plt.barh(y_pos, importances, color=plt.cm.viridis(np.linspace(0, 1, len(params))))
    plt.yticks(y_pos, [p.replace('_', '\n') for p in params])
    plt.xlabel('Importance')
    plt.title('Parameter Importance')
    plt.grid(True, alpha=0.3)

    # Add value labels
    for i, (bar, imp) in enumerate(zip(bars, importances)):
        plt.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2,
                f'{imp:.3f}', ha='left', va='center', fontsize=9)
except Exception as e:
    plt.text(0.5, 0.5, f'Parameter Importance\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

# 3. Parallel Coordinate Plot (for top parameters)
plt.subplot(2, 3, 3)
try:
    # Get top 4 most important parameters for cleaner visualization
    top_params = list(optuna.importance.get_param_importances(study).keys())[:4]

    # Get data for completed trials only
    completed_trials_data = []
    for trial in study.trials:
        if trial.state == optuna.trial.TrialState.COMPLETE:
            trial_data = [trial.params[p] for p in top_params]
            trial_data.append(trial.value)
            completed_trials_data.append(trial_data)

    if completed_trials_data:
        data_array = np.array(completed_trials_data)

        # Normalize parameters for visualization
        for i in range(len(top_params)):
            col = data_array[:, i]
            col_min, col_max = col.min(), col.max()
            if col_max > col_min:
                data_array[:, i] = (col - col_min) / (col_max - col_min)

        # Plot lines
        for i, row in enumerate(data_array):
            color_intensity = row[-1]  # Use accuracy for coloring
            plt.plot(range(len(top_params)), row[:-1], alpha=0.6,
                    color=plt.cm.viridis(color_intensity), linewidth=1)

        plt.xticks(range(len(top_params)), [p.replace('_', '\n') for p in top_params])
        plt.ylabel('Normalized Value')
        plt.title(f'Parallel Coordinates\n(Top {len(top_params)} Parameters)')
        plt.grid(True, alpha=0.3)
    else:
        plt.text(0.5, 0.5, 'No completed trials', ha='center', va='center', transform=plt.gca().transAxes)
except Exception as e:
    plt.text(0.5, 0.5, f'Parallel Coordinates\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

# 4. Pruning Analysis
plt.subplot(2, 3, 4)
try:
    trial_states = [t.state.name for t in study.trials]
    state_counts = pd.Series(trial_states).value_counts()

    colors = ['#2ecc71', '#e74c3c', '#f39c12']  # Green, Red, Orange
    wedges, texts, autotexts = plt.pie(state_counts.values, labels=state_counts.index,
                                      autopct='%1.1f%%', colors=colors[:len(state_counts)])
    plt.title(f'Trial States\n({len(study.trials)} total trials)')

    # Make percentage text more readable
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_weight('bold')
except Exception as e:
    plt.text(0.5, 0.5, f'Trial States\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

# 5. Performance Distribution
plt.subplot(2, 3, 5)
try:
    completed_values = [t.value for t in study.trials if t.value is not None]

    plt.hist(completed_values, bins=20, alpha=0.7, color='skyblue', edgecolor='black')
    plt.axvline(study.best_value, color='red', linestyle='--', linewidth=2, label=f'Best: {study.best_value:.4f}')
    plt.axvline(manual_cv_mean, color='orange', linestyle='--', linewidth=2, label=f'Manual: {manual_cv_mean:.4f}')
    plt.axvline(np.mean(completed_values), color='green', linestyle='-', linewidth=2, label=f'Mean: {np.mean(completed_values):.4f}')
    plt.xlabel('Accuracy')
    plt.ylabel('Frequency')
    plt.title('Score Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)
except Exception as e:
    plt.text(0.5, 0.5, f'Score Distribution\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

# 6. Time vs Performance
plt.subplot(2, 3, 6)
try:
    trial_durations = []
    trial_values_for_time = []

    for trial in study.trials:
        if trial.value is not None and trial.duration is not None:
            trial_durations.append(trial.duration.total_seconds())
            trial_values_for_time.append(trial.value)

    if trial_durations:
        scatter = plt.scatter(trial_durations, trial_values_for_time,
                            c=trial_values_for_time, cmap='viridis', alpha=0.6, s=30)
        plt.colorbar(scatter, label='Accuracy')
        plt.xlabel('Trial Duration (seconds)')
        plt.ylabel('Accuracy')
        plt.title('Time vs Performance')
        plt.grid(True, alpha=0.3)

        # Highlight best trial
        best_trial = study.best_trial
        if best_trial.duration:
            plt.scatter([best_trial.duration.total_seconds()], [best_trial.value],
                       color='red', s=100, marker='*', label='Best Trial')
            plt.legend()
    else:
        plt.text(0.5, 0.5, 'No timing data available', ha='center', va='center', transform=plt.gca().transAxes)
except Exception as e:
    plt.text(0.5, 0.5, f'Time vs Performance\nError: {str(e)}', ha='center', va='center', transform=plt.gca().transAxes)

plt.tight_layout()
plt.suptitle('Optuna Optimization Analysis', fontsize=16, y=0.98)
plt.show()

# ========================
# 5. MANUAL vs OPTUNA COMPARISON
# ========================
print("\n5. MANUAL vs OPTUNA PARAMETER COMPARISON")
print("-" * 50)

# Create comparison visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# Parameter comparison radar chart
try:
    # Select key parameters that exist in both
    common_params = []
    manual_values = []
    optuna_values = []

    for param in ['max_depth', 'learning_rate', 'n_estimators', 'subsample',
                  'reg_alpha', 'reg_lambda', 'gamma', 'scale_pos_weight']:
        if param in manual_params and param in optuna_best_params:
            common_params.append(param)
            # Normalize values for comparison
            manual_val = manual_params[param]
            optuna_val = optuna_best_params[param]

            # Get parameter range for normalization
            if param in PARAM_RANGES:
                param_min, param_max = PARAM_RANGES[param]
                manual_norm = (manual_val - param_min) / (param_max - param_min)
                optuna_norm = (optuna_val - param_min) / (param_max - param_min)
            else:
                manual_norm = manual_val
                optuna_norm = optuna_val

            manual_values.append(manual_norm)
            optuna_values.append(optuna_norm)

    # Create radar chart
    angles = np.linspace(0, 2 * np.pi, len(common_params), endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))  # Complete the circle

    manual_values_plot = manual_values + [manual_values[0]]
    optuna_values_plot = optuna_values + [optuna_values[0]]

    ax1.plot(angles, manual_values_plot, 'o-', linewidth=2, label='Manual', color='orange')
    ax1.fill(angles, manual_values_plot, alpha=0.25, color='orange')
    ax1.plot(angles, optuna_values_plot, 'o-', linewidth=2, label='Optuna', color='blue')
    ax1.fill(angles, optuna_values_plot, alpha=0.25, color='blue')

    ax1.set_xticks(angles[:-1])
    ax1.set_xticklabels([p.replace('_', '\n') for p in common_params])
    ax1.set_ylim(0, 1)
    ax1.set_title('Parameter Comparison\n(Normalized Values)', fontsize=12, pad=20)
    ax1.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0))
    ax1.grid(True)

except Exception as e:
    ax1.text(0.5, 0.5, f'Parameter Radar Chart\nError: {str(e)}', ha='center', va='center', transform=ax1.transAxes)

# Performance comparison bar chart
ax2.bar(['Manual\nExpert', 'Random\nSearch', 'Optuna\nPruning'],
        [manual_test_accuracy, random_test_accuracy, optuna_test_accuracy],
        color=['orange', 'gray', 'blue'], alpha=0.7)
ax2.set_ylabel('Test Accuracy')
ax2.set_title('Final Performance Comparison')
ax2.grid(True, alpha=0.3)

# Add value labels on bars
for i, (method, acc) in enumerate(zip(['Manual', 'Random', 'Optuna'],
                                    [manual_test_accuracy, random_test_accuracy, optuna_test_accuracy])):
    ax2.text(i, acc + 0.0005, f'{acc:.4f}', ha='center', va='bottom', fontweight='bold')

# Time comparison
times = [manual_time/60, random_time/60, optuna_time/60]
ax3.bar(['Manual\n(1 trial)', 'Random\n({} trials)'.format(N_TRIALS),
         'Optuna\n({} trials)'.format(N_TRIALS)],
        times, color=['orange', 'gray', 'blue'], alpha=0.7)
ax3.set_ylabel('Time (minutes)')
ax3.set_title('Optimization Time Comparison')
ax3.grid(True, alpha=0.3)

# Add value labels on bars
for i, t in enumerate(times):
    ax3.text(i, t + max(times) * 0.01, f'{t:.1f}m', ha='center', va='bottom', fontweight='bold')

# Efficiency plot (accuracy improvement vs time)
efficiency_data = {
    'Method': ['Manual', 'Random Search', 'Optuna'],
    'Accuracy': [manual_test_accuracy, random_test_accuracy, optuna_test_accuracy],
    'Time (min)': [manual_time/60, random_time/60, optuna_time/60],
    'Improvement': [0,
                   (random_test_accuracy - manual_test_accuracy) * 100,
                   (optuna_test_accuracy - manual_test_accuracy) * 100]
}

colors = ['orange', 'gray', 'blue']
sizes = [50, 100, 100]  # Manual gets smaller dot since it's just 1 trial

scatter = ax4.scatter(efficiency_data['Time (min)'], efficiency_data['Improvement'],
                     c=colors, s=sizes, alpha=0.7)
ax4.set_xlabel('Time (minutes)')
ax4.set_ylabel('Accuracy Improvement (%)')
ax4.set_title('Efficiency: Improvement vs Time')
ax4.grid(True, alpha=0.3)

# Add method labels
for i, method in enumerate(efficiency_data['Method']):
    ax4.annotate(method,
                (efficiency_data['Time (min)'][i], efficiency_data['Improvement'][i]),
                xytext=(5, 5), textcoords='offset points', fontweight='bold')

plt.tight_layout()
plt.suptitle('Manual vs Optuna Comprehensive Comparison', fontsize=16, y=0.98)
plt.show()

# ========================
# 6. COMPREHENSIVE COMPARISON
# ========================
print("\n6. COMPREHENSIVE COMPARISON WITH PRUNING")
print("-" * 50)

# Create detailed comparison
comparison_data = {
    'Method': ['Manual (Expert)', 'RandomSearch', 'Optuna + Pruning'],
    'CV Accuracy': [manual_cv_mean, random_cv_score, study.best_value],
    'Test Accuracy': [manual_test_accuracy, random_test_accuracy, optuna_test_accuracy],
    'Time (min)': [manual_time/60, random_time/60, optuna_time/60],
    'Trials': [1, N_TRIALS, len(completed_trials)],
    'Pruned': [0, 0, len(pruned_trials)],
    'Efficiency': [manual_test_accuracy/(manual_time/60),
                   random_test_accuracy/(random_time/60),
                   optuna_test_accuracy/(optuna_time/60)]
}

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df.to_string(index=False))

print(f"\nPRUNING EFFECTIVENESS:")
print("-" * 30)
print(f"✓ Total trials attempted: {len(study.trials)}")
print(f"✓ Trials completed: {len(completed_trials)} ({len(completed_trials)/len(study.trials)*100:.1f}%)")
print(f"✓ Trials pruned: {len(pruned_trials)} ({len(pruned_trials)/len(study.trials)*100:.1f}%)")
if len(completed_trials) > 0:
    estimated_time_without_pruning = optuna_time * len(study.trials) / len(completed_trials)
    time_saved = estimated_time_without_pruning - optuna_time
    print(f"✓ Estimated time saved: {time_saved:.1f} seconds ({time_saved/60:.1f} minutes)")
    print(f"✓ Speed improvement: {estimated_time_without_pruning/optuna_time:.1f}x faster")

print(f"\nKEY INSIGHTS:")
print("-" * 30)

# Compare against manual baseline
random_improvement = ((random_test_accuracy - manual_test_accuracy) / manual_test_accuracy) * 100
optuna_improvement = ((optuna_test_accuracy - manual_test_accuracy) / manual_test_accuracy) * 100

print(f"RandomSearch vs Manual: {random_improvement:+.2f}% accuracy improvement")
print(f"Optuna vs Manual: {optuna_improvement:+.2f}% accuracy improvement")

# Compare RandomSearch vs Optuna (THE KEY COMPARISON)
optuna_vs_random = ((optuna_test_accuracy - random_test_accuracy) / random_test_accuracy) * 100
print(f"Optuna vs RandomSearch: {optuna_vs_random:+.2f}% accuracy improvement")

# Statistical significance on this large dataset
accuracy_diff = abs(optuna_test_accuracy - random_test_accuracy)
print(f"Absolute accuracy difference: {accuracy_diff:.4f}")
print(f"On {len(y_test):,} test samples, this is {int(accuracy_diff * len(y_test))} fewer/more correct predictions")

# ========================
# 7. PARAMETER INSIGHTS WITH PRUNING
# ========================
print(f"\n7. ENHANCED PARAMETER INSIGHTS")
print("-" * 50)

print("TOP 5 MOST IMPORTANT PARAMETERS:")
param_importance = optuna.importance.get_param_importances(study)
for i, (param, importance) in enumerate(sorted(param_importance.items(), key=lambda x: x[1], reverse=True)[:5]):
    print(f"{i+1}. {param:20}: {importance:.3f} importance")

print(f"\nOPTIMAL vs MANUAL PARAMETERS (detailed comparison):")
print("-" * 60)
print(f"{'Parameter':<20} {'Manual':<12} {'Optuna':<12} {'Difference':<15} {'% Change'}")
print("-" * 60)

key_params = ['max_depth', 'learning_rate', 'n_estimators', 'subsample',
              'reg_alpha', 'reg_lambda', 'gamma', 'scale_pos_weight']

for param in key_params:
    if param in optuna_best_params and param in manual_params:
        manual_val = manual_params[param]
        optimal_val = optuna_best_params[param]
        diff = optimal_val - manual_val
        pct_change = (diff / manual_val) * 100 if manual_val != 0 else float('inf')

        print(f"{param:<20} {manual_val:<12.3f} {optimal_val:<12.3f} {diff:<15.3f} {pct_change:>+7.1f}%")

# ========================
# 8. TRIAL ANALYSIS
# ========================
print(f"\n8. DETAILED TRIAL ANALYSIS")
print("-" * 50)

print("OPTIMIZATION PROGRESSION:")
completed_values = [t.value for t in study.trials if t.value is not None]
if completed_values:
    print(f"✓ Best score: {max(completed_values):.4f}")
    print(f"✓ Worst score: {min(completed_values):.4f}")
    print(f"✓ Mean score: {np.mean(completed_values):.4f}")
    print(f"✓ Standard deviation: {np.std(completed_values):.4f}")

    # Find when best score was achieved
    best_trial_number = None
    for trial in study.trials:
        if trial.value == study.best_value:
            best_trial_number = trial.number
            break

    if best_trial_number is not None:
        print(f"✓ Best score found at trial: {best_trial_number + 1}/{len(study.trials)}")
        convergence_point = (best_trial_number + 1) / len(study.trials)
        if convergence_point < 0.5:
            print(f"✓ Early convergence: Found best solution in first {convergence_point*100:.1f}% of trials")
        elif convergence_point < 0.8:
            print(f"✓ Good convergence: Found best solution at {convergence_point*100:.1f}% of trials")
        else:
            print(f"✓ Late convergence: Found best solution at {convergence_point*100:.1f}% of trials")

print(f"\nPRUNING ANALYSIS:")
if len(pruned_trials) > 0:
    pruned_steps = [len(t.intermediate_values) for t in pruned_trials if hasattr(t, 'intermediate_values')]
    if pruned_steps:
        print(f"✓ Average steps before pruning: {np.mean(pruned_steps):.1f}")
        print(f"✓ Earliest pruning at step: {min(pruned_steps)}")
        print(f"✓ Latest pruning at step: {max(pruned_steps)}")

    # Analyze pruning effectiveness
    if completed_values:
        pruning_threshold = np.median(completed_values)
        print(f"✓ Median score of completed trials: {pruning_threshold:.4f}")
        print(f"✓ Pruning likely saved time on {len(pruned_trials)} unpromising trials")



print(f"\n" + "=" * 60)
print(f"EXPERIMENT SUMMARY:")
print(f"Dataset: Forest Covertype ({X.shape[0]:,} samples, {X.shape[1]} features)")
print(f"Task: Binary classification with class imbalance")
print(f"Hyperparameters tuned: {len(PARAM_RANGES)}")
print(f"Total optimization budget: {N_TRIALS} trials")
print(f"Best method: {'Optuna' if optuna_test_accuracy >= max(manual_test_accuracy, random_test_accuracy) else 'Manual' if manual_test_accuracy >= random_test_accuracy else 'RandomSearch'}")
print(f"Best accuracy: {max(manual_test_accuracy, random_test_accuracy, optuna_test_accuracy):.4f}")
print(f"Pruning effectiveness: {len(pruned_trials)}/{len(study.trials)} trials pruned")
print("=" * 60)

Loading Forest Covertype Dataset...
This is a REAL complex dataset from UCI repository
Predicting forest cover type from cartographic variables
Original classes: [1 2 3 4 5 6 7] with distribution: [     0 211840 283301  35754   2747   9493  17367  20510]
Binary classes distribution: [495141  85871]

DATASET COMPLEXITY:
- Samples: 581,012
- Features: 54 (mix of continuous and categorical)
- Train set: 464,809 samples
- Test set: 116,203 samples
- Class imbalance: 495,141 vs 85,871
- Real-world geographic/environmental data with complex interactions
OPTIMIZATION CHALLENGE:
- 12 hyperparameters to tune
- Complex parameter interactions
- Large dataset requiring longer training times
- 100 trials budget
- Real-world data with noise and complexity

1. MANUAL HYPERPARAMETERS (Expert Intuition)
--------------------------------------------------
Manual Parameters: 13 parameters set
Cross-validation Accuracy: 0.9850 (+/- 0.0006)
Test Accuracy: 0.9850
Training Time: 91.9 seconds
Number of combina