# 03 ‚Äî Experiments (with/without PCA) + Optional Hyperparameter Tuning

This notebook sets up the **required experiment grid**:

1. Ridge, HGB, XGBoost, LightGBM **with and without PCA** (8 experiments)
2. Same 4 models with and without PCA **with hyperparameter tuning** (8 experiments)

We evaluate using:
- **3-fold CV F1** on the training set
- F1 on the held-out test set

> Note: To keep this notebook runnable locally without extra services, MLflow/DagsHub logging is optional. You can add it later.


In [3]:
# 03 ‚Äî Train Models (With Optuna) + Log 8 Experiments to DagsHub/MLflow

from pathlib import Path
import json
import sqlite3
from datetime import datetime

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
import joblib

import optuna

# DagsHub + MLflow
import dagshub
dagshub.init(repo_owner='Aayushnepal09', repo_name='my-first-repo', mlflow=True)

import mlflow

SEED = 42
np.random.seed(SEED)

PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent

import sys
sys.path.append(str(PROJECT_ROOT))

DB_PATH = PROJECT_ROOT / "data" / "airline.db"
MODELS_DIR = PROJECT_ROOT / "models"
METRICS_DIR = PROJECT_ROOT / "models" / "metrics"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
METRICS_DIR.mkdir(parents=True, exist_ok=True)

print("PROJECT_ROOT:", PROJECT_ROOT)
print("DB_PATH:", DB_PATH, "exists=", DB_PATH.exists())

PROJECT_ROOT: c:\Users\nepal\OneDrive\Desktop\airline_satisfaction_appp
DB_PATH: c:\Users\nepal\OneDrive\Desktop\airline_satisfaction_appp\data\airline.db exists= True


## Helper: run a single experiment

In [4]:
# Load training dataframe from the normalized DB (same join as NB02)
if not DB_PATH.exists():
    raise FileNotFoundError(
        f"Database not found at {DB_PATH}. Run 01_create_database.ipynb first."
    )

conn = sqlite3.connect(DB_PATH)

query = '''
SELECT
    p.gender,
    p.customer_type,
    p.age,
    t.type_of_travel,
    t.travel_class,
    t.flight_distance,
    s.inflight_wifi_service,
    s.departure_arrival_time_convenient,
    s.ease_of_online_booking,
    s.gate_location,
    s.food_and_drink,
    s.online_boarding,
    s.seat_comfort,
    s.inflight_entertainment,
    s.on_board_service,
    s.leg_room_service,
    s.baggage_handling,
    s.checkin_service,
    s.inflight_service,
    s.cleanliness,
    d.departure_delay_minutes,
    d.arrival_delay_minutes,
    sat.satisfaction_binary
FROM trip t
JOIN passenger p ON t.passenger_id = p.passenger_id
JOIN service_rating s ON s.trip_id = t.trip_id
JOIN delay d ON d.trip_id = t.trip_id
JOIN satisfaction sat ON sat.trip_id = t.trip_id
;
'''
df = pd.read_sql_query(query, conn)
conn.close()

TARGET = "satisfaction_binary"
X = df.drop(columns=[TARGET])
y = df[TARGET].astype(int)

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=SEED,
    stratify=y
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

display(df.head())
print("Train:", X_train.shape, "Test:", X_test.shape)


Unnamed: 0,gender,customer_type,age,type_of_travel,travel_class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,...,inflight_entertainment,on_board_service,leg_room_service,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_minutes,arrival_delay_minutes,satisfaction_binary
0,Male,Loyal Customer,13,Personal Travel,Eco Plus,460,3,4,3,1,...,5,4,3,4,4,5,5,25,18,0
1,Male,disloyal Customer,25,Business travel,Business,235,3,2,3,3,...,1,1,5,3,1,4,1,1,6,0
2,Female,Loyal Customer,26,Business travel,Business,1142,2,2,2,2,...,5,4,3,4,4,4,5,0,0,1
3,Female,Loyal Customer,25,Business travel,Business,562,2,5,5,5,...,2,2,5,3,1,4,2,11,9,0
4,Male,Loyal Customer,61,Business travel,Business,214,3,3,3,3,...,3,3,4,4,3,3,3,0,0,1


Train: (83123, 22) Test: (20781, 22)


## Run required experiments

In [5]:
# Preprocessing + estimator builders
from housing_pipeline import build_preprocessing

def build_pipeline_for_params(model_name: str, use_pca: bool, params: dict):
    preprocess = build_preprocessing(use_pca=use_pca)

    if model_name == "logreg":
        est = LogisticRegression(
            max_iter=3000,
            random_state=SEED,
            **params
        )
    elif model_name == "ridge":
        est = RidgeClassifier(
            random_state=SEED,
            **params
        )
    elif model_name == "hgb":
        est = HistGradientBoostingClassifier(
            random_state=SEED,
            **params
        )
    elif model_name == "rf":
        est = RandomForestClassifier(
            random_state=SEED,
            n_jobs=-1,
            **params
        )
    else:
        raise ValueError(f"Unknown model_name: {model_name}")

    return Pipeline([("preprocess", preprocess), ("model", est)])

def suggest_params(trial: optuna.Trial, model_name: str) -> dict:
    if model_name == "logreg":
        # liblinear supports l1/l2, lbfgs supports l2
        solver = trial.suggest_categorical("solver", ["liblinear", "lbfgs"])
        if solver == "liblinear":
            penalty = trial.suggest_categorical("penalty", ["l1", "l2"])
        else:
            penalty = "l2"
        C = trial.suggest_float("C", 1e-3, 50.0, log=True)
        return {"solver": solver, "penalty": penalty, "C": C}

    if model_name == "ridge":
        alpha = trial.suggest_float("alpha", 1e-3, 50.0, log=True)
        return {"alpha": alpha}

    if model_name == "hgb":
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
        max_depth = trial.suggest_int("max_depth", 2, 10)
        max_leaf_nodes = trial.suggest_int("max_leaf_nodes", 15, 255)
        min_samples_leaf = trial.suggest_int("min_samples_leaf", 10, 200)
        l2_regularization = trial.suggest_float("l2_regularization", 1e-6, 10.0, log=True)
        max_iter = trial.suggest_int("max_iter", 100, 500)
        return {
            "learning_rate": learning_rate,
            "max_depth": max_depth,
            "max_leaf_nodes": max_leaf_nodes,
            "min_samples_leaf": min_samples_leaf,
            "l2_regularization": l2_regularization,
            "max_iter": max_iter,
        }

    if model_name == "rf":
        n_estimators = trial.suggest_int("n_estimators", 150, 600)
        max_depth = trial.suggest_int("max_depth", 3, 30)
        min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
        min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
        max_features = trial.suggest_categorical("max_features", ["sqrt", "log2", None])
        return {
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "min_samples_split": min_samples_split,
            "min_samples_leaf": min_samples_leaf,
            "max_features": max_features,
        }

    raise ValueError(f"Unknown model_name: {model_name}")


## Save the best pipeline as `global_best_model_optuna.pkl`
(Notebook name kept from the starter repo; tuning here uses GridSearchCV by default.)

In [6]:
# Run 8 tuned experiments:
# 4 algorithms √ó (PCA on/off) √ó (Optuna tuning)

mlflow.set_experiment("airline_satisfaction_with_optuna")

EXPERIMENTS = []
for model_name in ["logreg", "ridge", "hgb", "rf"]:
    for use_pca in [False, True]:
        EXPERIMENTS.append(dict(model_name=model_name, use_pca=use_pca, tuned=True))

results = []
best = None  # track best by test_f1

def objective(trial: optuna.Trial, model_name: str, use_pca: bool) -> float:
    params = suggest_params(trial, model_name)
    pipe = build_pipeline_for_params(model_name, use_pca, params)

    scores = cross_val_score(
        pipe, X_train, y_train,
        cv=cv,
        scoring="f1",
        n_jobs=-1
    )
    return float(np.mean(scores))

# Keep trials reasonable for grading/runtime
TRIALS_BY_MODEL = {"logreg": 25, "ridge": 20, "hgb": 25, "rf": 20}

for exp in EXPERIMENTS:
    model_name = exp["model_name"]
    use_pca = exp["use_pca"]
    n_trials = TRIALS_BY_MODEL.get(model_name, 20)

    run_name = f"{model_name}__pca_{int(use_pca)}__optuna"
    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(
            {
                "model_name": model_name,
                "use_pca": int(use_pca),
                "tuned": 1,
                "optuna_trials": n_trials,
                "cv_folds": cv.get_n_splits(),
                "seed": SEED,
            }
        )

        study = optuna.create_study(direction="maximize")
        study.optimize(lambda t: objective(t, model_name, use_pca), n_trials=n_trials)

        best_params = study.best_params
        cv_best_f1 = float(study.best_value)

        # Fit best + test
        best_pipe = build_pipeline_for_params(model_name, use_pca, best_params)
        best_pipe.fit(X_train, y_train)
        y_pred = best_pipe.predict(X_test)
        test_f1 = float(f1_score(y_test, y_pred))

        mlflow.log_metrics({"cv_f1_best": cv_best_f1, "test_f1": test_f1})
        mlflow.log_params({f"best__{k}": v for k, v in best_params.items()})

        # Save artifacts
        stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
        model_path = MODELS_DIR / f"{model_name}__pca_{int(use_pca)}__optuna.pkl"
        metrics_path = METRICS_DIR / f"{model_name}__pca_{int(use_pca)}__optuna.metrics.json"

        joblib.dump(best_pipe, model_path)

        metrics = {
            "model_name": model_name,
            "use_pca": bool(use_pca),
            "tuned": True,
            "optuna_trials": n_trials,
            "cv_f1_best": cv_best_f1,
            "test_f1": test_f1,
            "best_params": best_params,
            "model_path": str(model_path),
            "created_utc": stamp,
        }
        with open(metrics_path, "w", encoding="utf-8") as f:
            json.dump(metrics, f, indent=2)

        mlflow.log_artifact(str(model_path))
        mlflow.log_artifact(str(metrics_path))

        row = {
            "model_name": model_name,
            "use_pca": use_pca,
            "tuned": True,
            "cv_f1_best": cv_best_f1,
            "test_f1": test_f1,
            "model_path": str(model_path),
        }
        results.append(row)

        if best is None or test_f1 > best["test_f1"]:
            best = row

results_df = pd.DataFrame(results).sort_values(["test_f1"], ascending=False)
display(results_df)
print("Best (optuna):", best)

# Save best tuned model in the same filename used by the API
best_model_path = Path(best["model_path"])
best_pipe = joblib.load(best_model_path)

GLOBAL_BEST_PATH = MODELS_DIR / "global_best_model_optuna.pkl"
joblib.dump(best_pipe, GLOBAL_BEST_PATH)

print("‚úÖ Saved global best (optuna) to:", GLOBAL_BEST_PATH)

# Optional: save summary
summary_path = METRICS_DIR / "summary_optuna.json"
summary = {
    "best_model_path": str(GLOBAL_BEST_PATH),
    "best_test_f1": float(best["test_f1"]),
    "n_experiments": len(results_df),
}
with open(summary_path, "w", encoding="utf-8") as f:
    json.dump(summary, f, indent=2)

mlflow.log_artifact(str(summary_path))

# Quick check
pred = best_pipe.predict(X_test)
print("Global best optuna test F1:", f1_score(y_test, pred))


2025/12/19 20:50:22 INFO mlflow.tracking.fluent: Experiment with name 'airline_satisfaction_with_optuna' does not exist. Creating a new experiment.
[I 2025-12-19 20:50:23,614] A new study created in memory with name: no-name-96e7ecfa-a4fc-4489-a74d-193e99e79c39
[I 2025-12-19 20:50:28,150] Trial 0 finished with value: 0.8504176329961755 and parameters: {'solver': 'lbfgs', 'C': 0.0012717161160094024}. Best is trial 0 with value: 0.8504176329961755.
[I 2025-12-19 20:50:31,997] Trial 1 finished with value: 0.8530778738600006 and parameters: {'solver': 'lbfgs', 'C': 13.679121661996287}. Best is trial 1 with value: 0.8530778738600006.
[I 2025-12-19 20:50:34,937] Trial 2 finished with value: 0.8530828106682169 and parameters: {'solver': 'lbfgs', 'C': 0.41662217254022155}. Best is trial 2 with value: 0.8530828106682169.
[I 2025-12-19 20:50:39,900] Trial 3 finished with value: 0.8530295679371822 and parameters: {'solver': 'liblinear', 'penalty': 'l1', 'C': 0.29853584548441897}. Best is trial 2 

üèÉ View run logreg__pca_0__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/749e7cc07eef450fbd67379a542e68ee
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:51:17,610] A new study created in memory with name: no-name-4d7f060b-54cc-4553-88de-61021d2b2517
[I 2025-12-19 20:51:20,312] Trial 0 finished with value: 0.8507761562593238 and parameters: {'solver': 'liblinear', 'penalty': 'l1', 'C': 0.1902526619226148}. Best is trial 0 with value: 0.8507761562593238.
[I 2025-12-19 20:51:21,558] Trial 1 finished with value: 0.8509576396186119 and parameters: {'solver': 'lbfgs', 'C': 0.010636262964791036}. Best is trial 1 with value: 0.8509576396186119.
[I 2025-12-19 20:51:24,804] Trial 2 finished with value: 0.8506987736259383 and parameters: {'solver': 'liblinear', 'penalty': 'l1', 'C': 0.2822121658271086}. Best is trial 1 with value: 0.8509576396186119.
[I 2025-12-19 20:51:26,299] Trial 3 finished with value: 0.8507392604866647 and parameters: {'solver': 'liblinear', 'penalty': 'l2', 'C': 0.006909850955308336}. Best is trial 1 with value: 0.8509576396186119.
[I 2025-12-19 20:51:27,490] Trial 4 finished with value: 0.8507267457256527

üèÉ View run logreg__pca_1__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/a6148e9ca6a447bd94d62d4040211d54
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:51:57,703] A new study created in memory with name: no-name-daa96ced-a257-47f4-af1b-8eada1273e62
[I 2025-12-19 20:51:58,877] Trial 0 finished with value: 0.8495901908322552 and parameters: {'alpha': 0.0019106300209339548}. Best is trial 0 with value: 0.8495901908322552.
[I 2025-12-19 20:52:00,152] Trial 1 finished with value: 0.8495901908322552 and parameters: {'alpha': 0.0013485374937700445}. Best is trial 0 with value: 0.8495901908322552.
[I 2025-12-19 20:52:01,262] Trial 2 finished with value: 0.8495901908322552 and parameters: {'alpha': 0.005584189427653593}. Best is trial 0 with value: 0.8495901908322552.
[I 2025-12-19 20:52:02,349] Trial 3 finished with value: 0.8495901908322552 and parameters: {'alpha': 0.024181545369804772}. Best is trial 0 with value: 0.8495901908322552.
[I 2025-12-19 20:52:03,447] Trial 4 finished with value: 0.849578249190045 and parameters: {'alpha': 1.3040372189107128}. Best is trial 0 with value: 0.8495901908322552.
[I 2025-12-19 20:52:04

üèÉ View run ridge__pca_0__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/911f5561bb6d41f7a4acb2bfa4e6ffe0
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:52:23,413] A new study created in memory with name: no-name-cb96f291-f516-4610-8c24-06e3758c36a6
[I 2025-12-19 20:52:24,799] Trial 0 finished with value: 0.8468168606518403 and parameters: {'alpha': 34.76570682465232}. Best is trial 0 with value: 0.8468168606518403.
[I 2025-12-19 20:52:25,892] Trial 1 finished with value: 0.8469103362141513 and parameters: {'alpha': 0.0901238369590444}. Best is trial 1 with value: 0.8469103362141513.
[I 2025-12-19 20:52:27,030] Trial 2 finished with value: 0.8469061730333542 and parameters: {'alpha': 8.630752772862683}. Best is trial 1 with value: 0.8469103362141513.
[I 2025-12-19 20:52:28,252] Trial 3 finished with value: 0.8469223482099405 and parameters: {'alpha': 0.10643386007286507}. Best is trial 3 with value: 0.8469223482099405.
[I 2025-12-19 20:52:29,433] Trial 4 finished with value: 0.8469223482099405 and parameters: {'alpha': 0.606332318648398}. Best is trial 3 with value: 0.8469223482099405.
[I 2025-12-19 20:52:30,549] Trial

üèÉ View run ridge__pca_1__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/81709c68d8a5409c932952a6dc804a72
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:52:48,660] A new study created in memory with name: no-name-30b73a29-1dcd-4986-b54c-2e71479dbef7
[I 2025-12-19 20:52:53,495] Trial 0 finished with value: 0.9277356263989953 and parameters: {'learning_rate': 0.031486260730097644, 'max_depth': 3, 'max_leaf_nodes': 212, 'min_samples_leaf': 172, 'l2_regularization': 1.2809991240945977, 'max_iter': 294}. Best is trial 0 with value: 0.9277356263989953.
[I 2025-12-19 20:53:04,783] Trial 1 finished with value: 0.9545625695722759 and parameters: {'learning_rate': 0.020793644617702847, 'max_depth': 7, 'max_leaf_nodes': 210, 'min_samples_leaf': 108, 'l2_regularization': 4.424881463471817, 'max_iter': 495}. Best is trial 1 with value: 0.9545625695722759.
[I 2025-12-19 20:53:15,049] Trial 2 finished with value: 0.9555327452009402 and parameters: {'learning_rate': 0.033143813446725935, 'max_depth': 7, 'max_leaf_nodes': 157, 'min_samples_leaf': 22, 'l2_regularization': 4.23746874916919e-06, 'max_iter': 294}. Best is trial 2 with valu

üèÉ View run hgb__pca_0__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/f086324ffb134e90ba82252e79635353
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:55:28,404] A new study created in memory with name: no-name-2e1beb1d-db54-459f-b2d7-029955653c3c
[I 2025-12-19 20:55:31,364] Trial 0 finished with value: 0.9182573103838412 and parameters: {'learning_rate': 0.13378591638763093, 'max_depth': 5, 'max_leaf_nodes': 152, 'min_samples_leaf': 106, 'l2_regularization': 8.450338583358269e-06, 'max_iter': 106}. Best is trial 0 with value: 0.9182573103838412.
[I 2025-12-19 20:55:37,812] Trial 1 finished with value: 0.8779851465810526 and parameters: {'learning_rate': 0.02988391162698395, 'max_depth': 2, 'max_leaf_nodes': 195, 'min_samples_leaf': 93, 'l2_regularization': 0.19610015555448682, 'max_iter': 459}. Best is trial 0 with value: 0.9182573103838412.
[I 2025-12-19 20:55:50,737] Trial 2 finished with value: 0.9145689952063162 and parameters: {'learning_rate': 0.012392040214111348, 'max_depth': 8, 'max_leaf_nodes': 154, 'min_samples_leaf': 146, 'l2_regularization': 0.5278440061224791, 'max_iter': 350}. Best is trial 0 with val

üèÉ View run hgb__pca_1__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/1740c0d2db0e4678bf6004e67d7acfe8
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 20:59:30,798] A new study created in memory with name: no-name-f5dfff51-70da-4843-9858-d28dfdd1ac71
[I 2025-12-19 20:59:46,796] Trial 0 finished with value: 0.948415742458278 and parameters: {'n_estimators': 244, 'max_depth': 13, 'min_samples_split': 19, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.948415742458278.
[I 2025-12-19 21:00:20,077] Trial 1 finished with value: 0.9462938443105248 and parameters: {'n_estimators': 564, 'max_depth': 21, 'min_samples_split': 2, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 0 with value: 0.948415742458278.
[I 2025-12-19 21:00:38,941] Trial 2 finished with value: 0.9444398956368241 and parameters: {'n_estimators': 324, 'max_depth': 14, 'min_samples_split': 14, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 0 with value: 0.948415742458278.
[I 2025-12-19 21:01:08,655] Trial 3 finished with value: 0.9530427867858 and parameters: {'n_estimators': 474, 'max_depth': 20, 'min_sa

üèÉ View run rf__pca_0__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/f283c2ba78ce42f5950a5b706ce5ff92
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


[I 2025-12-19 21:18:44,714] A new study created in memory with name: no-name-be9b097f-5a87-4828-a3f6-312592edda09
[I 2025-12-19 21:20:11,228] Trial 0 finished with value: 0.9136749896430232 and parameters: {'n_estimators': 494, 'max_depth': 24, 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 0 with value: 0.9136749896430232.
[I 2025-12-19 21:23:58,094] Trial 1 finished with value: 0.9231289109594712 and parameters: {'n_estimators': 225, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': None}. Best is trial 1 with value: 0.9231289109594712.
[I 2025-12-19 21:28:08,954] Trial 2 finished with value: 0.9203392048407384 and parameters: {'n_estimators': 249, 'max_depth': 26, 'min_samples_split': 17, 'min_samples_leaf': 9, 'max_features': None}. Best is trial 1 with value: 0.9231289109594712.
[I 2025-12-19 21:29:00,434] Trial 3 finished with value: 0.8072075728504909 and parameters: {'n_estimators': 211, 'max_depth': 3, 'min

üèÉ View run rf__pca_1__optuna at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1/runs/9ba7ce25660d4c08a65ef5295faebc2c
üß™ View experiment at: https://dagshub.com/Aayushnepal09/my-first-repo.mlflow/#/experiments/1


Unnamed: 0,model_name,use_pca,tuned,cv_f1_best,test_f1,model_path
4,hgb,False,True,0.957351,0.959686,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
6,rf,False,True,0.954006,0.956487,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
5,hgb,True,True,0.937413,0.939292,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
7,rf,True,True,0.923739,0.927166,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
0,logreg,False,True,0.853111,0.855662,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
2,ridge,False,True,0.84959,0.852969,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
1,logreg,True,True,0.850958,0.852298,c:\Users\nepal\OneDrive\Desktop\airline_satisf...
3,ridge,True,True,0.846922,0.849952,c:\Users\nepal\OneDrive\Desktop\airline_satisf...


Best (optuna): {'model_name': 'hgb', 'use_pca': False, 'tuned': True, 'cv_f1_best': 0.9573509785980608, 'test_f1': 0.959685569904548, 'model_path': 'c:\\Users\\nepal\\OneDrive\\Desktop\\airline_satisfaction_appp\\models\\hgb__pca_0__optuna.pkl'}
‚úÖ Saved global best (optuna) to: c:\Users\nepal\OneDrive\Desktop\airline_satisfaction_appp\models\global_best_model_optuna.pkl
Global best optuna test F1: 0.959685569904548
