In [None]:
"""
Run Optuna Hyperparameter Optimization for ALL Lightning ML Models
===================================================================
Complete notebook-ready script with all functions included
"""

import optuna
import inspect
from sklearn.datasets import load_iris, load_diabetes, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score, mean_squared_error, accuracy_score, r2_score

# Import all model classes
from lightning_ml.regression import (
    LinearRegression,
    LogisticRegression,
    RidgeRegression,
    LassoRegression
)

from lightning_ml.tree import (
    DecisionTreeClassifier,
    DecisionTreeRegressor
)

from lightning_ml.ensemble import (
    RandomForestClassifier,
    RandomForestRegressor,
    BaggingClassifier
)

from lightning_ml.svm import (
    SVMClassifier,
    SVMRegressor
)

from lightning_ml.neighbours import (
    KNNClassifier,
    KNNRegressor
)

from lightning_ml.cluster import (
    KMeans,
    DBSCAN
)

# Import suggest functions from your optuna_optimizer module
from lightning_ml.optuna_optimizer import (
    suggest_linear_regression_params,
    suggest_logistic_regression_params,
    suggest_ridge_regression_params,
    suggest_lasso_regression_params,
    suggest_svm_classifier_params,
    suggest_svm_regressor_params,
    suggest_decision_tree_classifier_params,
    suggest_decision_tree_regressor_params,
    suggest_random_forest_classifier_params,
    suggest_random_forest_regressor_params,
    suggest_bagging_classifier_params,
    suggest_knn_classifier_params,
    suggest_knn_regressor_params,
    suggest_kmeans_params,
    suggest_dbscan_params,
    suggest_apriori_params
)

# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def filter_model_params(params, model_class):
    """Filter parameters to only include those accepted by the model"""
    sig = inspect.signature(model_class.__init__)
    valid_params = set(sig.parameters.keys()) - {'self'}
    filtered = {k: v for k, v in params.items() if k in valid_params}
    return filtered


def (model_class, suggest_params_func, X_train, y_train, 
                     X_test, y_test, metric='accuracy', n_trials=10, 
                     timeout=None, direction='maximize'):
    """Create and run Optuna optimization"""
    
    def objective(trial):
        params = suggest_params_func(trial)
        model_params = filter_model_params(params, model_class)
        
        model = model_class(**model_params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        if metric == 'accuracy':
            score = accuracy_score(y_test, y_pred)
        elif metric == 'mse':
            score = -mean_squared_error(y_test, y_pred)
        elif metric == 'r2':
            score = r2_score(y_test, y_pred)
        else:
            raise ValueError(f"Unknown metric: {metric}")
        
        return score
    
    study = optuna.create_study(
        direction=direction,
        sampler=optuna.samplers.TPESampler(seed=42)
    )
    
    study.optimize(objective, n_trials=n_trials, timeout=timeout, show_progress_bar=True)
    
    best_params = filter_model_params(study.best_params, model_class)
    best_model = model_class(**best_params)
    best_model.fit(X_train, y_train)
    
    return best_model, study


def print_optimization_results(study, model_name):
    """Print optimization results"""
    print(f"\n{'='*70}")
    print(f"✅ {model_name} Optimization Complete!")
    print(f"{'='*70}")
    print(f"Best Score: {study.best_value:.4f}")
    print(f"\nBest Hyperparameters:")
    for key, value in study.best_params.items():
        print(f"  • {key}: {value}")
    print(f"\nTotal Trials: {len(study.trials)}")
    print(f"{'='*70}\n")


# =====================================================================
# MODEL GROUPS
# =====================================================================
regression_models = {
    "LinearRegression": (LinearRegression, suggest_linear_regression_params),
    "RidgeRegression": (RidgeRegression, suggest_ridge_regression_params),
    "LassoRegression": (LassoRegression, suggest_lasso_regression_params),
    "SVMRegressor": (SVMRegressor, suggest_svm_regressor_params),
    "DecisionTreeRegressor": (DecisionTreeRegressor, suggest_decision_tree_regressor_params),
    "RandomForestRegressor": (RandomForestRegressor, suggest_random_forest_regressor_params),
    "KNNRegressor": (KNNRegressor, suggest_knn_regressor_params),
}

classification_models = {
    "LogisticRegression": (LogisticRegression, suggest_logistic_regression_params),
    "SVMClassifier": (SVMClassifier, suggest_svm_classifier_params),
    "DecisionTreeClassifier": (DecisionTreeClassifier, suggest_decision_tree_classifier_params),
    "RandomForestClassifier": (RandomForestClassifier, suggest_random_forest_classifier_params),
    "BaggingClassifier": (BaggingClassifier, suggest_bagging_classifier_params),
    "KNNClassifier": (KNNClassifier, suggest_knn_classifier_params),
}

clustering_models = {
    "KMeans": (KMeans, suggest_kmeans_params),
    "DBSCAN": (DBSCAN, suggest_dbscan_params),
}

apriori_model = {
    "Apriori": (suggest_apriori_params)
}



# =====================================================================
# RUNNER FUNCTIONS
# =====================================================================

def run_all_regressors(n_trials=10, timeout=120):
    """Run hyperparameter tuning for all regression models"""
    print("\n" + "="*70)
    print("🔍 REGRESSION MODELS - HYPERPARAMETER TUNING")
    print("="*70)
    
    X, y = load_diabetes(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    results = {}
    for name, (cls, suggest_func) in regression_models.items():
        print(f"\n{'='*70}")
        print(f"🚀 Running: {name}")
        print(f"{'='*70}")
        try:
            best_model, study = create_optimizer(
                model_class=cls,
                suggest_params_func=suggest_func,
                X_train=X_train, y_train=y_train,
                X_test=X_test, y_test=y_test,
                metric='r2',
                n_trials=n_trials,
                timeout=timeout,
                direction='maximize'
            )
            print_optimization_results(study, name)
            
            # Test the best model
            y_pred = best_model.predict(X_test)
            test_r2 = r2_score(y_test, y_pred)
            test_mse = mean_squared_error(y_test, y_pred)
            print(f"📊 Test Set Performance:")
            print(f"  • R² Score: {test_r2:.4f}")
            print(f"  • MSE: {test_mse:.4f}")
            
            results[name] = {
                'model': best_model,
                'study': study,
                'best_params': study.best_params,
                'best_score': study.best_value,
                'test_r2': test_r2,
                'test_mse': test_mse
            }
        except Exception as e:
            print(f"❌ [ERROR] {name} failed: {e}")
            import traceback
            traceback.print_exc()
    
    return results


def run_all_classifiers(n_trials=10, timeout=120):
    """Run hyperparameter tuning for all classification models"""
    print("\n" + "="*70)
    print("🔍 CLASSIFICATION MODELS - HYPERPARAMETER TUNING")
    print("="*70)
    
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    results = {}
    for name, (cls, suggest_func) in classification_models.items():
        print(f"\n{'='*70}")
        print(f"🚀 Running: {name}")
        print(f"{'='*70}")
        try:
            best_model, study = create_optimizer(
                model_class=cls,
                suggest_params_func=suggest_func,
                X_train=X_train, y_train=y_train,
                X_test=X_test, y_test=y_test,
                metric='accuracy',
                n_trials=n_trials,
                timeout=timeout,
                direction='maximize'
            )
            print_optimization_results(study, name)
            
            # Test the best model
            y_pred = best_model.predict(X_test)
            test_accuracy = accuracy_score(y_test, y_pred)
            print(f"📊 Test Set Accuracy: {test_accuracy:.4f}")
            
            results[name] = {
                'model': best_model,
                'study': study,
                'best_params': study.best_params,
                'best_score': study.best_value,
                'test_accuracy': test_accuracy
            }
        except Exception as e:
            print(f"❌ [ERROR] {name} failed: {e}")
            import traceback
            traceback.print_exc()
    
    return results


def run_all_clustering(n_trials=10, timeout=120):
    """Run hyperparameter tuning for all clustering models"""
    print("\n" + "="*70)
    print("🔍 CLUSTERING MODELS - HYPERPARAMETER TUNING")
    print("="*70)
    
    X, y = make_blobs(n_samples=300, centers=3, n_features=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    results = {}
    for name, (cls, suggest_func) in clustering_models.items():
        print(f"\n{'='*70}")
        print(f"🚀 Running: {name}")
        print(f"{'='*70}")
        try:
            def objective(trial):
                params = suggest_func(trial)
                model_params = filter_model_params(params, cls)
                model = cls(**model_params)
                model.fit(X_train)
                labels = model.predict(X_test)
                
                # Check if we have at least 2 clusters
                if len(set(labels)) < 2:
                    return -1.0
                
                score = silhouette_score(X_test, labels)
                return score

            study = optuna.create_study(
                direction='maximize', 
                sampler=optuna.samplers.TPESampler(seed=42)
            )
            study.optimize(objective, n_trials=n_trials, timeout=timeout, show_progress_bar=True)

            best_params = filter_model_params(study.best_params, cls)
            best_model = cls(**best_params)
            best_model.fit(X_train)
            
            print_optimization_results(study, name)
            
            # Test the best model
            test_labels = best_model.predict(X_test)
            if len(set(test_labels)) >= 2:
                test_silhouette = silhouette_score(X_test, test_labels)
                print(f"📊 Test Set Silhouette Score: {test_silhouette:.4f}")
            else:
                test_silhouette = -1.0
                print(f"⚠️  Test Set: Only 1 cluster found")
            
            results[name] = {
                'model': best_model,
                'study': study,
                'best_params': study.best_params,
                'best_score': study.best_value,
                'test_silhouette': test_silhouette
            }
        except Exception as e:
            print(f"❌ [ERROR] {name} failed: {e}")
            import traceback
            traceback.print_exc()
    
    return results


# =====================================================================
# SUMMARY FUNCTION
# =====================================================================

def print_summary(reg_results, cls_results, clu_results):
    """Print a summary of all results"""
    print("\n" + "="*70)
    print("📋 HYPERPARAMETER TUNING SUMMARY")
    print("="*70)
    
    if reg_results:
        print("\n🔹 REGRESSION MODELS:")
        print("-" * 70)
        for name, result in reg_results.items():
            print(f"{name:30s} | R²: {result['test_r2']:6.4f} | MSE: {result['test_mse']:8.2f}")
    
    if cls_results:
        print("\n🔹 CLASSIFICATION MODELS:")
        print("-" * 70)
        for name, result in cls_results.items():
            print(f"{name:30s} | Accuracy: {result['test_accuracy']:6.4f}")
    
    if clu_results:
        print("\n🔹 CLUSTERING MODELS:")
        print("-" * 70)
        for name, result in clu_results.items():
            print(f"{name:30s} | Silhouette: {result['test_silhouette']:6.4f}")
    
    print("\n" + "="*70)


# =====================================================================
# MAIN EXECUTION
# =====================================================================

if __name__ == "__main__":
    print("\n" + "="*70)
    print("⚡ LIGHTNING ML - HYPERPARAMETER OPTIMIZATION")
    print("="*70)
    print("Running Optuna hyperparameter tuning for all models...")
    
    # Run all optimizations
    reg_results = run_all_regressors(n_trials=10, timeout=120)
    cls_results = run_all_classifiers(n_trials=10, timeout=120)
    clu_results = run_all_clustering(n_trials=10, timeout=120)
    
    # Print summary
    print_summary(reg_results, cls_results, clu_results)
    
    print("\n✅ ALL HYPERPARAMETER TUNING COMPLETE!")
    print("="*70)

[I 2025-10-07 19:37:22,950] A new study created in memory with name: no-name-8fbafed6-b214-47da-be04-af5e88490355



⚡ LIGHTNING ML - HYPERPARAMETER OPTIMIZATION
Running Optuna hyperparameter tuning for all models...

🔍 REGRESSION MODELS - HYPERPARAMETER TUNING

🚀 Running: LinearRegression


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:37:24,313] A new study created in memory with name: no-name-34899789-36ea-4dd0-bf1a-3b326461c43b


[I 2025-10-07 19:37:24,233] Trial 0 finished with value: -4.277644824214061 and parameters: {'fit_intercept': False}. Best is trial 0 with value: -4.277644824214061.
[I 2025-10-07 19:37:24,246] Trial 1 finished with value: 0.4526029062055733 and parameters: {'fit_intercept': True}. Best is trial 1 with value: 0.4526029062055733.
[I 2025-10-07 19:37:24,254] Trial 2 finished with value: 0.4526029062055733 and parameters: {'fit_intercept': True}. Best is trial 1 with value: 0.4526029062055733.
[I 2025-10-07 19:37:24,261] Trial 3 finished with value: -4.277644824214061 and parameters: {'fit_intercept': False}. Best is trial 1 with value: 0.4526029062055733.
[I 2025-10-07 19:37:24,267] Trial 4 finished with value: -4.277644824214061 and parameters: {'fit_intercept': False}. Best is trial 1 with value: 0.4526029062055733.
[I 2025-10-07 19:37:24,275] Trial 5 finished with value: -4.277644824214061 and parameters: {'fit_intercept': False}. Best is trial 1 with value: 0.4526029062055733.
[I 202

  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:38:02,392] Trial 0 finished with value: 0.29397378336612734 and parameters: {'alpha': 0.0745934328572655, 'epochs': 500, 'lr': 0.015702970884055395, 'batch_size': 16, 'optimizer': 'adam'}. Best is trial 0 with value: 0.29397378336612734.
[I 2025-10-07 19:38:06,556] Trial 1 finished with value: -1.853561770774018 and parameters: {'alpha': 0.001267425589893723, 'epochs': 500, 'lr': 0.03142880890840111, 'batch_size': 128, 'optimizer': 'adam'}. Best is trial 0 with value: 0.29397378336612734.
[I 2025-10-07 19:38:07,861] Trial 2 finished with value: -0.5572912814803299 and parameters: {'alpha': 1.1462107403425035, 'epochs': 100, 'lr': 0.0007523742884534858, 'batch_size': 64, 'optimizer': 'sgd'}. Best is trial 0 with value: 0.29397378336612734.
[I 2025-10-07 19:38:10,297] Trial 3 finished with value: -1.8010020452948354 and parameters: {'alpha': 1.0907475835157696, 'epochs': 100, 'lr': 0.00015673095467235422, 'batch_size': 32, 'optimizer': 'sgd'}. Best is trial 0 with value:

[I 2025-10-07 19:39:14,187] A new study created in memory with name: no-name-385290d1-337b-4fc7-98ac-ee660965bec7



✅ RidgeRegression Optimization Complete!
Best Score: 0.2940

Best Hyperparameters:
  • alpha: 0.0745934328572655
  • epochs: 500
  • lr: 0.015702970884055395
  • batch_size: 16
  • optimizer: adam

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.2928
  • MSE: 3746.7400

🚀 Running: LassoRegression


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:39:41,281] Trial 0 finished with value: 0.29072790527784664 and parameters: {'alpha': 0.0745934328572655, 'epochs': 500, 'lr': 0.015702970884055395, 'batch_size': 16, 'optimizer': 'adam'}. Best is trial 0 with value: 0.29072790527784664.
[I 2025-10-07 19:39:47,055] Trial 1 finished with value: -1.8519591598800962 and parameters: {'alpha': 0.001267425589893723, 'epochs': 500, 'lr': 0.03142880890840111, 'batch_size': 128, 'optimizer': 'adam'}. Best is trial 0 with value: 0.29072790527784664.
[I 2025-10-07 19:39:48,746] Trial 2 finished with value: -0.5598507654381542 and parameters: {'alpha': 1.1462107403425035, 'epochs': 100, 'lr': 0.0007523742884534858, 'batch_size': 64, 'optimizer': 'sgd'}. Best is trial 0 with value: 0.29072790527784664.
[I 2025-10-07 19:39:51,364] Trial 3 finished with value: -1.8094531603416342 and parameters: {'alpha': 1.0907475835157696, 'epochs': 100, 'lr': 0.00015673095467235422, 'batch_size': 32, 'optimizer': 'sgd'}. Best is trial 0 with value

[I 2025-10-07 19:41:01,830] A new study created in memory with name: no-name-a7470ee4-7417-4de6-a676-10ad021f999c



✅ LassoRegression Optimization Complete!
Best Score: 0.2907

Best Hyperparameters:
  • alpha: 0.0745934328572655
  • epochs: 500
  • lr: 0.015702970884055395
  • batch_size: 16
  • optimizer: adam

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.2898
  • MSE: 3762.5241

🚀 Running: SVMRegressor


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:41:08,155] Trial 0 finished with value: -4.010839262947069 and parameters: {'kernel': 'rbf', 'C': 0.2938027938703535, 'epsilon': 0.020511104188433976, 'epochs': 100, 'lr': 0.005399484409787433, 'batch_size': 32, 'optimizer': 'adam', 'gamma_type': 'manual', 'gamma': 0.016480446427978974}. Best is trial 0 with value: -4.010839262947069.
[I 2025-10-07 19:41:35,448] Trial 1 finished with value: -4.01092731292504 and parameters: {'kernel': 'sigmoid', 'C': 0.262108787826544, 'epsilon': 0.03839629299804171, 'epochs': 250, 'lr': 0.000816845589476017, 'batch_size': 16, 'optimizer': 'rmsprop', 'gamma': 0.004809461967501573, 'coef0': 0.06505159298527952}. Best is trial 0 with value: -4.010839262947069.
[I 2025-10-07 19:41:43,153] Trial 2 finished with value: -4.010899091733475 and parameters: {'kernel': 'rbf', 'C': 0.1963434157293333, 'epsilon': 0.23359635026261596, 'epochs': 250, 'lr': 0.00017541893487450815, 'batch_size': 64, 'optimizer': 'sgd', 'gamma_type': 'manual', 'gamma':

[I 2025-10-07 19:43:08,880] A new study created in memory with name: no-name-2211433b-cbf0-4bb2-a942-9b9120eee6b8



✅ SVMRegressor Optimization Complete!
Best Score: 0.4787

Best Hyperparameters:
  • kernel: poly
  • C: 61.53085601625307
  • epsilon: 0.41327654594663626
  • epochs: 350
  • lr: 0.005532496914298506
  • batch_size: 64
  • optimizer: rmsprop
  • degree: 3
  • gamma: 0.002755546207779663
  • coef0: 0.22793516254194168

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.4957
  • MSE: 2671.7217

🚀 Running: DecisionTreeRegressor


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:43:12,495] Trial 0 finished with value: 0.2663759162909659 and parameters: {'use_max_depth': False, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': None, 'criterion': 'mae'}. Best is trial 0 with value: 0.2663759162909659.
[I 2025-10-07 19:43:13,883] Trial 1 finished with value: 0.3562406824906095 and parameters: {'use_max_depth': True, 'max_depth': 30, 'min_samples_split': 17, 'min_samples_leaf': 3, 'max_features': 'log2', 'criterion': 'mae'}. Best is trial 1 with value: 0.3562406824906095.
[I 2025-10-07 19:43:16,017] Trial 2 finished with value: 0.21182901831684497 and parameters: {'use_max_depth': False, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_features': 'log2', 'criterion': 'mse'}. Best is trial 1 with value: 0.3562406824906095.
[I 2025-10-07 19:43:20,828] Trial 3 finished with value: -0.04230998530012142 and parameters: {'use_max_depth': True, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'crite

[I 2025-10-07 19:43:39,250] A new study created in memory with name: no-name-815e589b-c3ae-4220-95ba-57538f022cb3



✅ DecisionTreeRegressor Optimization Complete!
Best Score: 0.3796

Best Hyperparameters:
  • use_max_depth: True
  • max_depth: 30
  • min_samples_split: 16
  • min_samples_leaf: 10
  • max_features: log2
  • criterion: mse

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.3945
  • MSE: 3207.8784

🚀 Running: RandomForestRegressor


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:43:40,300] Trial 0 finished with value: 0.47999813466117414 and parameters: {'use_max_depth': False, 'bootstrap': True, 'use_max_samples': True, 'max_samples': 0.5290418060840998, 'n_estimators': 440, 'min_samples_split': 13, 'min_samples_leaf': 8, 'max_features': 'log2', 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.47999813466117414.
[I 2025-10-07 19:43:40,831] Trial 1 finished with value: 0.45299599332015705 and parameters: {'use_max_depth': True, 'max_depth': 18, 'bootstrap': True, 'use_max_samples': False, 'n_estimators': 230, 'min_samples_split': 16, 'min_samples_leaf': 2, 'max_features': 0.8, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.47999813466117414.
[I 2025-10-07 19:43:41,428] Trial 2 finished with value: 0.45792040672684686 and parameters: {'use_max_depth': True, 'max_depth': 19, 'bootstrap': False, 'n_estimators': 230, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 'log2', 'criterion': 'absolute_error'}. B

[I 2025-10-07 19:43:45,671] A new study created in memory with name: no-name-e1b09a24-2b4b-4367-8069-a87a12b3799b



✅ RandomForestRegressor Optimization Complete!
Best Score: 0.4802

Best Hyperparameters:
  • use_max_depth: True
  • max_depth: 34
  • bootstrap: True
  • use_max_samples: False
  • n_estimators: 270
  • min_samples_split: 17
  • min_samples_leaf: 9
  • max_features: 0.6
  • criterion: squared_error

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.4797
  • MSE: 2756.4608

🚀 Running: KNNRegressor


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:43:45,718] Trial 0 finished with value: 0.42685542815532496 and parameters: {'n_neighbors': 19, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.42685542815532496.
[I 2025-10-07 19:43:45,725] Trial 1 finished with value: 0.38145251845799466 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 0 with value: 0.42685542815532496.
[I 2025-10-07 19:43:45,757] Trial 2 finished with value: 0.40184902691365276 and parameters: {'n_neighbors': 42, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 0 with value: 0.42685542815532496.
[I 2025-10-07 19:43:45,840] Trial 3 finished with value: 0.43649141112727463 and parameters: {'n_neighbors': 22, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.43649141112727463.
[I 2025-10-07 19:43:45,859] Trial 4 finished with value: 0.383199940632142 and parameters: {'n_neighbors': 23, 'weights': 'uniform', 'metric': 'manhattan'}. Best is trial 3 with

[I 2025-10-07 19:43:45,926] A new study created in memory with name: no-name-ff644c92-4181-4dcc-9da0-e7a6a030e02d


[I 2025-10-07 19:43:45,901] Trial 8 finished with value: 0.41269373976168233 and parameters: {'n_neighbors': 28, 'weights': 'distance', 'metric': 'manhattan'}. Best is trial 6 with value: 0.458366791825463.
[I 2025-10-07 19:43:45,910] Trial 9 finished with value: 0.4163773702474983 and parameters: {'n_neighbors': 30, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 6 with value: 0.458366791825463.

✅ KNNRegressor Optimization Complete!
Best Score: 0.4584

Best Hyperparameters:
  • n_neighbors: 16
  • weights: distance
  • metric: cosine

Total Trials: 10

📊 Test Set Performance:
  • R² Score: 0.4584
  • MSE: 2869.6549

🔍 CLASSIFICATION MODELS - HYPERPARAMETER TUNING

🚀 Running: LogisticRegression


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:43:51,392] Trial 0 finished with value: 0.9666666666666667 and parameters: {'epochs': 200, 'lr': 0.07114476009343425, 'batch_size': 16, 'optimizer': 'sgd'}. Best is trial 0 with value: 0.9666666666666667.
[I 2025-10-07 19:43:57,000] Trial 1 finished with value: 0.8333333333333334 and parameters: {'epochs': 400, 'lr': 0.00011527987128232407, 'batch_size': 16, 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.9666666666666667.
[I 2025-10-07 19:43:59,576] Trial 2 finished with value: 0.7 and parameters: {'epochs': 250, 'lr': 0.0007476312062252305, 'batch_size': 16, 'optimizer': 'sgd'}. Best is trial 0 with value: 0.9666666666666667.
[I 2025-10-07 19:44:01,219] Trial 3 finished with value: 0.9666666666666667 and parameters: {'epochs': 300, 'lr': 0.005987474910461402, 'batch_size': 32, 'optimizer': 'sgd'}. Best is trial 0 with value: 0.9666666666666667.
[I 2025-10-07 19:44:02,899] Trial 4 finished with value: 0.7666666666666667 and parameters: {'epochs': 200, 'lr': 0.0

[I 2025-10-07 19:44:08,204] A new study created in memory with name: no-name-246b8199-0fdb-45cf-9229-c8d87fb35aa6



✅ LogisticRegression Optimization Complete!
Best Score: 0.9667

Best Hyperparameters:
  • epochs: 200
  • lr: 0.07114476009343425
  • batch_size: 16
  • optimizer: sgd

Total Trials: 10

📊 Test Set Accuracy: 1.0000

🚀 Running: SVMClassifier


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:44:14,963] Trial 0 finished with value: 0.7 and parameters: {'kernel': 'rbf', 'C': 0.2938027938703535, 'epochs': 150, 'lr': 0.00013066739238053285, 'batch_size': 16, 'optimizer': 'sgd', 'gamma': 'scale'}. Best is trial 0 with value: 0.7.
[I 2025-10-07 19:44:21,579] Trial 1 finished with value: 0.7 and parameters: {'kernel': 'poly', 'C': 0.7476312062252299, 'epochs': 350, 'lr': 0.00019010245319870352, 'batch_size': 64, 'optimizer': 'adam', 'gamma': 'scale', 'degree': 4, 'coef0': 0.17052412368729153}. Best is trial 0 with value: 0.7.
[I 2025-10-07 19:44:22,635] Trial 2 finished with value: 0.7666666666666667 and parameters: {'kernel': 'poly', 'C': 0.8200518402245829, 'epochs': 100, 'lr': 0.0023359635026261607, 'batch_size': 64, 'optimizer': 'sgd', 'gamma': 'scale', 'degree': 4, 'coef0': 0.5467102793432796}. Best is trial 2 with value: 0.7666666666666667.
[I 2025-10-07 19:44:30,052] Trial 3 finished with value: 1.0 and parameters: {'kernel': 'rbf', 'C': 48.35952776465949,

[I 2025-10-07 19:45:46,192] A new study created in memory with name: no-name-f57f6b66-117f-4c1b-bf29-d6319b4bc2dc



✅ SVMClassifier Optimization Complete!
Best Score: 1.0000

Best Hyperparameters:
  • kernel: rbf
  • C: 48.35952776465949
  • epochs: 350
  • lr: 0.0069782812651260325
  • batch_size: 32
  • optimizer: sgd
  • gamma: scale

Total Trials: 10

📊 Test Set Accuracy: 1.0000

🚀 Running: DecisionTreeClassifier


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:45:46,557] Trial 0 finished with value: 1.0 and parameters: {'use_max_depth': False, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': None, 'criterion': 'gini'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:46,728] Trial 1 finished with value: 1.0 and parameters: {'use_max_depth': True, 'max_depth': 30, 'min_samples_split': 17, 'min_samples_leaf': 3, 'max_features': 'log2', 'criterion': 'gini'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:46,932] Trial 2 finished with value: 1.0 and parameters: {'use_max_depth': False, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_features': 'log2', 'criterion': 'entropy'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:47,109] Trial 3 finished with value: 1.0 and parameters: {'use_max_depth': True, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'criterion': 'gini'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:47,350] Trial 4 finished with val

[I 2025-10-07 19:45:48,507] A new study created in memory with name: no-name-4250d0ff-f355-4352-9b51-1e4dcb4929a9


[I 2025-10-07 19:45:48,336] Trial 9 finished with value: 1.0 and parameters: {'use_max_depth': True, 'max_depth': 27, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'criterion': 'gini'}. Best is trial 0 with value: 1.0.

✅ DecisionTreeClassifier Optimization Complete!
Best Score: 1.0000

Best Hyperparameters:
  • use_max_depth: False
  • min_samples_split: 15
  • min_samples_leaf: 6
  • max_features: None
  • criterion: gini

Total Trials: 10

📊 Test Set Accuracy: 1.0000

🚀 Running: RandomForestClassifier


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:45:49,322] Trial 0 finished with value: 1.0 and parameters: {'use_max_depth': False, 'n_estimators': 370, 'criterion': 'gini', 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:49,638] Trial 1 finished with value: 1.0 and parameters: {'use_max_depth': True, 'max_depth': 13, 'n_estimators': 160, 'criterion': 'gini', 'min_samples_split': 7, 'min_samples_leaf': 7, 'max_features': 0.6, 'bootstrap': True}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:49,763] Trial 2 finished with value: 1.0 and parameters: {'use_max_depth': False, 'n_estimators': 30, 'criterion': 'gini', 'min_samples_split': 3, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:49,886] Trial 3 finished with value: 1.0 and parameters: {'use_max_depth': False, 'n_estimators': 20, 'criterion': 'gini', 'min_samples_split': 14, 'min_samples_le

[I 2025-10-07 19:45:52,554] A new study created in memory with name: no-name-071858d9-1f0d-4ea2-a0a0-d2026e2913d4



✅ RandomForestClassifier Optimization Complete!
Best Score: 1.0000

Best Hyperparameters:
  • use_max_depth: False
  • n_estimators: 370
  • criterion: gini
  • min_samples_split: 4
  • min_samples_leaf: 1
  • max_features: sqrt
  • bootstrap: True

Total Trials: 10

📊 Test Set Accuracy: 1.0000

🚀 Running: BaggingClassifier


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:45:55,643] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 40, 'max_samples': 0.9753571532049581, 'max_features': 0.8659969709057025, 'bootstrap': True, 'bootstrap_features': True}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:45:57,763] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 90, 'max_samples': 0.8005575058716043, 'max_features': 0.8540362888980227, 'bootstrap': False, 'bootstrap_features': True}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:00,037] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 20, 'max_samples': 0.5917022549267169, 'max_features': 0.6521211214797689, 'bootstrap': True, 'bootstrap_features': False}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:02,374] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 20, 'max_samples': 0.6460723242676091, 'max_features': 0.6831809216468459, 'bootstrap': False, 'bootstrap_features': False}. Best is trial 0 with value: 1

[I 2025-10-07 19:46:21,618] A new study created in memory with name: no-name-39ab265d-6f4f-4fea-af35-a25af274f8c1



✅ BaggingClassifier Optimization Complete!
Best Score: 1.0000

Best Hyperparameters:
  • n_estimators: 40
  • max_samples: 0.9753571532049581
  • max_features: 0.8659969709057025
  • bootstrap: True
  • bootstrap_features: True

Total Trials: 10

📊 Test Set Accuracy: 1.0000

🚀 Running: KNNClassifier


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:46:21,875] Trial 0 finished with value: 1.0 and parameters: {'n_neighbors': 19, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:21,914] Trial 1 finished with value: 0.9666666666666667 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:22,194] Trial 2 finished with value: 0.9666666666666667 and parameters: {'n_neighbors': 42, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:22,299] Trial 3 finished with value: 1.0 and parameters: {'n_neighbors': 22, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:22,637] Trial 4 finished with value: 1.0 and parameters: {'n_neighbors': 23, 'weights': 'uniform', 'metric': 'manhattan'}. Best is trial 0 with value: 1.0.
[I 2025-10-07 19:46:22,783] Trial 5 finished with value: 1.0 and parameters: {'n_neighbors': 31, 'w

[I 2025-10-07 19:46:23,388] A new study created in memory with name: no-name-738277d9-bf65-412c-bebf-f7d8dbdb8903


📊 Test Set Accuracy: 1.0000

🔍 CLUSTERING MODELS - HYPERPARAMETER TUNING

🚀 Running: KMeans


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:46:24,247] Trial 0 finished with value: 0.3492229946215037 and parameters: {'n_clusters': 9, 'max_iter': 1000, 'tol': 0.0008471801418819979, 'n_init': 14, 'init': 'k-means++'}. Best is trial 0 with value: 0.3492229946215037.
[I 2025-10-07 19:46:24,475] Trial 1 finished with value: 0.8598282581279164 and parameters: {'n_clusters': 3, 'max_iter': 900, 'tol': 0.00025378155082656634, 'n_init': 16, 'init': 'random'}. Best is trial 1 with value: 0.8598282581279164.
[I 2025-10-07 19:46:25,314] Trial 2 finished with value: 0.3399124758701325 and parameters: {'n_clusters': 17, 'max_iter': 300, 'tol': 5.337032762603957e-06, 'n_init': 7, 'init': 'random'}. Best is trial 1 with value: 0.8598282581279164.
[I 2025-10-07 19:46:25,607] Trial 3 finished with value: 0.2645567328569067 and parameters: {'n_clusters': 10, 'max_iter': 300, 'tol': 0.0002801635158716264, 'n_init': 7, 'init': 'random'}. Best is trial 1 with value: 0.8598282581279164.
[I 2025-10-07 19:46:26,582] Trial 4 finishe

[I 2025-10-07 19:46:31,152] A new study created in memory with name: no-name-c9c2568e-187f-41ef-85d2-f75b3534baf6


[I 2025-10-07 19:46:31,011] Trial 9 finished with value: 0.32709947546691487 and parameters: {'n_clusters': 13, 'max_iter': 1000, 'tol': 2.259279742015697e-06, 'n_init': 8, 'init': 'random'}. Best is trial 1 with value: 0.8598282581279164.

✅ KMeans Optimization Complete!
Best Score: 0.8598

Best Hyperparameters:
  • n_clusters: 3
  • max_iter: 900
  • tol: 0.00025378155082656634
  • n_init: 16
  • init: random

Total Trials: 10

📊 Test Set Silhouette Score: 0.8598

🚀 Running: DBSCAN


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-10-07 19:46:31,545] Trial 0 finished with value: 0.5381981195209474 and parameters: {'eps': 1.9352465823520764, 'min_samples': 20, 'metric': 'euclidean'}. Best is trial 0 with value: 0.5381981195209474.
[I 2025-10-07 19:46:31,751] Trial 1 finished with value: -0.42001988702941406 and parameters: {'eps': 0.864373149647393, 'min_samples': 3, 'metric': 'euclidean'}. Best is trial 0 with value: 0.5381981195209474.
[I 2025-10-07 19:46:31,838] Trial 2 finished with value: -1.0 and parameters: {'eps': 0.200864022049432, 'min_samples': 20, 'metric': 'euclidean'}. Best is trial 0 with value: 0.5381981195209474.
[I 2025-10-07 19:46:31,961] Trial 3 finished with value: -0.153518615631543 and parameters: {'eps': 0.9986820982818257, 'min_samples': 7, 'metric': 'euclidean'}. Best is trial 0 with value: 0.5381981195209474.
[I 2025-10-07 19:46:32,205] Trial 4 finished with value: 0.5329120807450787 and parameters: {'eps': 3.0980791841396598, 'min_samples': 4, 'metric': 'cosine'}. Best is trial

In [11]:
if __name__ == "__main__":
    import warnings
    warnings.filterwarnings("ignore")  # Clean output

    print("\n" + "="*70)
    print("⚡ LIGHTNING ML - APRIORI OPTUNA TEST")
    print("="*70)

    import optuna
    from lightning_ml import Apriori  # Your Apriori class
    from lightning_ml.optuna_optimizer import suggest_apriori_params  # Your Optuna parameter suggestion function
    import pandas as pd
    import numpy as np

    # Example transactional dataset
    data = {
        'Milk': [1, 0, 1, 1, 0, 1, 1, 0],
        'Bread': [1, 1, 1, 1, 0, 0, 1, 1],
        'Butter': [1, 0, 1, 0, 0, 1, 1, 0],
        'Eggs': [0, 1, 1, 1, 1, 0, 1, 1],
        'Cheese': [0, 0, 1, 1, 0, 1, 0, 0]
    }
    df = pd.DataFrame(data)

    def objective(trial):
        # Suggest hyperparameters using your function
        params = suggest_apriori_params(trial)
        model = Apriori(**params)
        model.fit(df)
        # Use avg lift of top 10 rules as objective
        return model.score(df)

    # Run Optuna study
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10, timeout=60)

    print("\n✅ Optuna optimization completed!")
    print(f"Best parameters: {study.best_params}")
    print(f"Best score (avg lift): {study.best_value:.4f}\n")


[I 2025-10-07 20:26:35,790] A new study created in memory with name: no-name-1071f3a5-8669-457e-9141-237dca0a944b
[I 2025-10-07 20:26:35,794] Trial 0 finished with value: 0.0 and parameters: {'use_max_length': False, 'min_support': 0.3761512710010716, 'min_confidence': 0.5049574728808788, 'min_lift': 2.930724414633814}. Best is trial 0 with value: 0.0.
[I 2025-10-07 20:26:35,795] Trial 1 finished with value: 0.0 and parameters: {'use_max_length': False, 'min_support': 0.2961692239929821, 'min_confidence': 0.8137449152492808, 'min_lift': 4.414664410423578}. Best is trial 0 with value: 0.0.
[I 2025-10-07 20:26:35,798] Trial 2 finished with value: 0.0 and parameters: {'use_max_length': False, 'min_support': 0.47296720530573905, 'min_confidence': 0.5090553327247854, 'min_lift': 4.126752017259424}. Best is trial 0 with value: 0.0.
[I 2025-10-07 20:26:35,801] Trial 3 finished with value: 0.0 and parameters: {'use_max_length': True, 'max_length': 9, 'min_support': 0.026160278217937843, 'min_c


⚡ LIGHTNING ML - APRIORI OPTUNA TEST

✅ Optuna optimization completed!
Best parameters: {'use_max_length': False, 'min_support': 0.2519486041154769, 'min_confidence': 0.4322436828302182, 'min_lift': 1.4326634897921595}
Best score (avg lift): 1.5750

