In [1]:
# """
# Test Decision Tree Regressor with Optuna Hyperparameter Optimization
# =====================================================================
# """

# import torch
# import numpy as np
# import optuna
# from sklearn.datasets import load_diabetes, make_regression
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# # Assuming your DecisionTreeRegressor is importable
# from lightning_ml.tree import DecisionTreeRegressor


# def suggest_decision_tree_regressor_params(trial):
#     """Decision Tree Regressor - Optuna parameter suggestions"""
#     use_max_depth = trial.suggest_categorical('use_max_depth', [True, False])
#     max_depth = trial.suggest_int('max_depth', 3, 30) if use_max_depth else None
    
#     return {
#         'max_depth': max_depth,
#         'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
#         'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
#         'max_features': trial.suggest_categorical('max_features', [None, 'sqrt', 'log2']),
#         'criterion': trial.suggest_categorical('criterion', ['mae', 'mse']),  
#         'random_state': 42,
#         'device': 'cuda' if torch.cuda.is_available() else 'cpu'
#     }


# def objective(trial, X_train, X_val, y_train, y_val, DecisionTreeRegressor):
#     """
#     Objective function for Optuna optimization.
    
#     Args:
#         trial: Optuna trial object
#         X_train, X_val: Training and validation features
#         y_train, y_val: Training and validation targets
#         DecisionTreeRegressor: Your model class
        
#     Returns:
#         Validation score (negative MSE for minimization)
#     """
#     # Get hyperparameters from trial
#     params = suggest_decision_tree_regressor_params(trial)
    
#     try:
#         # Create and train model
#         model = DecisionTreeRegressor(**params)
#         model.fit(X_train, y_train, verbose=False)
        
#         # Evaluate on validation set
#         y_pred = model.predict(X_val)
        
#         # Calculate metrics
#         mse = mean_squared_error(y_val, y_pred)
#         r2 = r2_score(y_val, y_pred)
#         mae = mean_absolute_error(y_val, y_pred)
        
#         # Store additional metrics in trial
#         trial.set_user_attr('r2_score', r2)
#         trial.set_user_attr('mae', mae)
#         trial.set_user_attr('tree_depth', model.get_depth())
#         trial.set_user_attr('n_leaves', model.get_n_leaves())
        
#         # Return MSE (Optuna minimizes by default)
#         return mse
        
#     except Exception as e:
#         print(f"Trial failed with error: {e}")
#         # Return a large value to indicate failure
#         return float('inf')


# def run_optuna_optimization(DecisionTreeRegressor, 
#                             n_trials=50, 
#                             dataset='diabetes',
#                             test_size=0.2,
#                             val_size=0.2):
#     """
#     Run Optuna hyperparameter optimization for Decision Tree Regressor.
    
#     Args:
#         DecisionTreeRegressor: Your model class
#         n_trials: Number of optimization trials
#         dataset: 'diabetes' or 'synthetic'
#         test_size: Test set proportion
#         val_size: Validation set proportion (from training data)
        
#     Returns:
#         Dictionary with optimization results
#     """
#     print("=" * 70)
#     print("Decision Tree Regressor - Optuna Hyperparameter Optimization")
#     print("=" * 70)
    
#     # Load dataset
#     if dataset == 'diabetes':
#         data = load_diabetes()
#         X, y = data.data, data.target
#         print(f"\nDataset: Diabetes (n_samples={len(X)}, n_features={X.shape[1]})")
#     else:
#         X, y = make_regression(n_samples=1000, n_features=20, 
#                                n_informative=15, noise=10, random_state=42)
#         print(f"\nDataset: Synthetic (n_samples={len(X)}, n_features={X.shape[1]})")
    
#     # Split data: Train / Val / Test
#     X_temp, X_test, y_temp, y_test = train_test_split(
#         X, y, test_size=test_size, random_state=42
#     )
#     X_train, X_val, y_train, y_val = train_test_split(
#         X_temp, y_temp, test_size=val_size, random_state=42
#     )
    
#     print(f"Train samples: {len(X_train)}")
#     print(f"Validation samples: {len(X_val)}")
#     print(f"Test samples: {len(X_test)}")
    
#     # Create Optuna study
#     print(f"\nStarting optimization with {n_trials} trials...")
#     study = optuna.create_study(
#         direction='minimize',  # Minimize MSE
#         study_name='decision_tree_regressor_optimization',
#         sampler=optuna.samplers.TPESampler(seed=42)
#     )
    
#     # Run optimization
#     study.optimize(
#         lambda trial: objective(trial, X_train, X_val, y_train, y_val, DecisionTreeRegressor),
#         n_trials=n_trials,
#         show_progress_bar=True
#     )
    
#     # Get best parameters
#     best_params = study.best_params
#     best_value = study.best_value
    
#     print("\n" + "=" * 70)
#     print("OPTIMIZATION RESULTS")
#     print("=" * 70)
#     print(f"\nBest Validation MSE: {best_value:.4f}")
#     print(f"\nBest Parameters:")
#     for param, value in best_params.items():
#         print(f"  {param}: {value}")
    
#     # Get additional metrics from best trial
#     best_trial = study.best_trial
#     print(f"\nBest Trial Additional Metrics:")
#     print(f"  R² Score: {best_trial.user_attrs.get('r2_score', 'N/A'):.4f}")
#     print(f"  MAE: {best_trial.user_attrs.get('mae', 'N/A'):.4f}")
#     print(f"  Tree Depth: {best_trial.user_attrs.get('tree_depth', 'N/A')}")
#     print(f"  Number of Leaves: {best_trial.user_attrs.get('n_leaves', 'N/A')}")
    
#     # Train final model with best parameters
#     print("\n" + "=" * 70)
#     print("FINAL MODEL EVALUATION")
#     print("=" * 70)
    
#     # Prepare best params for model initialization
#     final_params = best_params.copy()
#     if 'use_max_depth' in final_params:
#         del final_params['use_max_depth']
    
#     final_model = DecisionTreeRegressor(**final_params)
    
#     # Train on combined train+val data
#     X_train_full = np.vstack([X_train, X_val])
#     y_train_full = np.concatenate([y_train, y_val])
    
#     print("\nTraining final model on combined train+val data...")
#     final_model.fit(X_train_full, y_train_full, verbose=True)
    
#     # Evaluate on test set
#     y_pred_test = final_model.predict(X_test)
#     test_mse = mean_squared_error(y_test, y_pred_test)
#     test_r2 = r2_score(y_test, y_pred_test)
#     test_mae = mean_absolute_error(y_test, y_pred_test)
#     test_rmse = np.sqrt(test_mse)
    
#     print(f"\nTest Set Performance:")
#     print(f"  MSE:  {test_mse:.4f}")
#     print(f"  RMSE: {test_rmse:.4f}")
#     print(f"  MAE:  {test_mae:.4f}")
#     print(f"  R²:   {test_r2:.4f}")
    
#     # Feature importances
#     print(f"\nTop 5 Feature Importances:")
#     importances = final_model.feature_importances_
#     top_indices = np.argsort(importances)[-5:][::-1]
#     for idx in top_indices:
#         print(f"  Feature {idx}: {importances[idx]:.4f}")
    
#     # Return results
#     return {
#         'study': study,
#         'best_params': best_params,
#         'best_validation_mse': best_value,
#         'final_model': final_model,
#         'test_metrics': {
#             'mse': test_mse,
#             'rmse': test_rmse,
#             'mae': test_mae,
#             'r2': test_r2
#         }
#     }


# def plot_optimization_history(study):
#     """
#     Plot optimization history (requires plotly).
    
#     Args:
#         study: Optuna study object
#     """
#     try:
#         import plotly.graph_objects as go
        
#         # Create figure
#         fig = go.Figure()
        
#         # Add optimization history
#         trials = study.trials
#         values = [trial.value for trial in trials if trial.value != float('inf')]
        
#         fig.add_trace(go.Scatter(
#             y=values,
#             mode='lines+markers',
#             name='Trial MSE',
#             line=dict(color='blue', width=2),
#             marker=dict(size=6)
#         ))
        
#         # Add best value line
#         best_values = [min(values[:i+1]) for i in range(len(values))]
#         fig.add_trace(go.Scatter(
#             y=best_values,
#             mode='lines',
#             name='Best MSE',
#             line=dict(color='red', width=2, dash='dash')
#         ))
        
#         fig.update_layout(
#             title='Optimization History',
#             xaxis_title='Trial',
#             yaxis_title='MSE',
#             hovermode='x unified'
#         )
        
#         fig.show()
        
#     except ImportError:
#         print("Plotly not available for visualization")


# # Example usage
# if __name__ == "__main__":
#     # Import your DecisionTreeRegressor
#     # from lightning_ml.decision_tree import DecisionTreeRegressor
    
#     # For demonstration, we'll assume it's available
#     # Replace this with your actual import

    
#     # Uncomment below to run optimization

#     results = run_optuna_optimization(
#         DecisionTreeRegressor=DecisionTreeRegressor,
#         n_trials=10,
#         dataset='diabetes',
#         test_size=0.2,
#         val_size=0.2
#     )
    
#     # Optionally plot optimization history
#     plot_optimization_history(results['study'])
    
#     # Access results
#     print("\nOptimization Complete!")
#     print(f"Best validation MSE: {results['best_validation_mse']:.4f}")
#     print(f"Test R² score: {results['test_metrics']['r2']:.4f}")
    