In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Error in callback <bound method IPythonTraceDisplayHandler._display_traces_post_run of <mlflow.tracing.display.display_handler.IPythonTraceDisplayHandler object at 0x7b53a8e45760>> (for post_run_cell):


AttributeError: module 'mlflow' has no attribute 'get_tracking_uri'

In [1]:
!pip install optuna xgboost lightgbm "mlflow<3"

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting mlflow<3
  Downloading mlflow-2.22.4-py3-none-any.whl.metadata (30 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting mlflow-skinny==2.22.4 (from mlflow<3)
  Downloading mlflow_skinny-2.22.4-py3-none-any.whl.metadata (31 kB)
Collecting docker<8,>=4.0.0 (from mlflow<3)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow<3)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow<3)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting cachetools<6,>=5.0.0 (from mlflow-skinny==2.22.4->mlflow<3)
  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.22.4->mlflow<3)
  Downloading databricks_sdk-0.74.0-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━

In [2]:
base_folder = "/content/drive/MyDrive/Colab Notebooks/housing_fall2025"
%cd "{base_folder}"

/content/drive/MyDrive/Colab Notebooks/housing_fall2025


In [3]:
import sqlite3
import pandas as pd
conn = sqlite3.connect(f"{base_folder}/data/housing.db")
housing = pd.read_sql_query(
    """
    SELECT
        b.block_id,
        b.longitude,
        b.latitude,
        s.housing_median_age,
        s.total_rooms,
        s.total_bedrooms,
        s.population,
        s.households,
        s.median_income,
        s.median_house_value,
        op.name AS ocean_proximity
    FROM block AS b
    JOIN block_housing_stats AS s
        ON s.block_id = b.block_id
    JOIN ocean_proximity AS op
        ON op.ocean_proximity_id = b.ocean_proximity_id
    ORDER BY b.block_id
    """,
    conn,
)
conn.close()

housing.head()

Unnamed: 0,block_id,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,0,-122.23,37.88,41.0,880,129.0,322,126,8.3252,452600.0,NEAR BAY
1,1,-122.22,37.86,21.0,7099,1106.0,2401,1138,8.3014,358500.0,NEAR BAY
2,2,-122.24,37.85,52.0,1467,190.0,496,177,7.2574,352100.0,NEAR BAY
3,3,-122.25,37.85,52.0,1274,235.0,558,219,5.6431,341300.0,NEAR BAY
4,4,-122.25,37.85,52.0,1627,280.0,565,259,3.8462,342200.0,NEAR BAY


In [7]:
# notebooks/03_train_models_with_optuna.ipynb

import sys
import os
import sqlite3
import pandas as pd
import joblib
import mlflow
import optuna
from pathlib import Path
from optuna.samplers import TPESampler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score

# -----------------------------------------------------------------------------
# 1. SETUP & ROBUST IMPORTS
# -----------------------------------------------------------------------------
# Ensure we can find housing_pipeline.py from the parent directory
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))

if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
# Also check current dir just in case VS Code is running from root
if current_dir not in sys.path:
    sys.path.insert(0, current_dir)

try:
    from housing_pipeline import build_preprocessing, make_estimator_for_name
    print("✓ Successfully imported housing_pipeline components.")
except ImportError as e:
    print(f"❌ CRITICAL ERROR: Could not import housing_pipeline. {e}")
    # Stop execution if this fails
    raise

# -----------------------------------------------------------------------------
# 2. ROBUST LOAD DATA
# -----------------------------------------------------------------------------
# Try finding the DB in standard locations
db_paths = [
    os.path.join(current_dir, '..', 'data', 'churn.db'), # From notebooks folder
    os.path.join(current_dir, 'data', 'churn.db'),       # From project root
]

db_path = None
for path in db_paths:
    if os.path.exists(path):
        db_path = path
        break

if not db_path:
    raise FileNotFoundError("❌ Could not find churn.db. Did you run Notebook 01?")

print(f"✓ Database found at: {db_path}")

conn = sqlite3.connect(db_path)
query = """
SELECT c.CreditScore, g.Name AS Geography, gen.Name AS Gender, c.Age, c.Tenure, c.Balance, 
       c.NumOfProducts, c.HasCrCard, c.IsActiveMember, c.EstimatedSalary, c.Exited
FROM customer c 
JOIN geography g ON c.GeographyID = g.GeographyID 
JOIN gender gen ON c.GenderID = gen.GenderID
"""
df = pd.read_sql_query(query, conn)
conn.close()

# Prepare Split
X = df.drop("Exited", axis=1)
y = df["Exited"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
print(f"✓ Data Loaded: {len(X_train)} training samples.")

# -----------------------------------------------------------------------------
# 3. DEFINE OPTUNA OBJECTIVE
# -----------------------------------------------------------------------------
def objective(trial, algo, use_pca):
    """
    Optuna objective function to tune hyperparameters.
    """
    steps = [build_preprocessing()]
    
    # --- PCA TUNING ---
    if use_pca:
        n_components = trial.suggest_float("pca_n", 0.80, 0.99)
        steps.append(PCA(n_components=n_components))
    
    # --- MODEL TUNING ---
    model = make_estimator_for_name(algo)
    
    if algo == "logistic":
        C = trial.suggest_float("C", 0.01, 10.0, log=True)
        model.set_params(C=C)
        
    elif algo == "random_forest":
        n_est = trial.suggest_int("n_estimators", 50, 200)
        depth = trial.suggest_int("max_depth", 5, 30)
        model.set_params(n_estimators=n_est, max_depth=depth)
        
    elif algo == "xgboost":
        lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
        depth = trial.suggest_int("max_depth", 3, 10)
        model.set_params(learning_rate=lr, max_depth=depth)
        
    elif algo == "lightgbm":
        lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
        leaves = trial.suggest_int("num_leaves", 20, 100)
        model.set_params(learning_rate=lr, num_leaves=leaves)
    
    steps.append(model)
    pipeline = make_pipeline(*steps)
    
    # Cross Validation (3-fold for speed)
    # Note: We catch errors (like non-convergence) and return 0
    try:
        scores = cross_val_score(pipeline, X_train, y_train, cv=3, scoring="f1")
        return scores.mean()
    except Exception as e:
        return 0.0

# -----------------------------------------------------------------------------
# 4. RUN 8 TUNED EXPERIMENTS
# -----------------------------------------------------------------------------
# Configure MLflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("churn_experiments")

algorithms = ["logistic", "random_forest", "xgboost", "lightgbm"]
results = []

print(f"\n{'Run Name':<45} {'Best CV F1':<12} {'Test F1':<12}")
print("-" * 75)

for algo in algorithms:
    for use_pca in [False, True]:
        run_name = f"{algo}_{'pca' if use_pca else 'no_pca'}_tuned"
        
        # 1. OPTIMIZE
        study = optuna.create_study(direction="maximize", sampler=TPESampler(seed=42))
        # Running 5 trials for speed (increase to 20+ for real usage)
        study.optimize(lambda t: objective(t, algo, use_pca), n_trials=5)
        
        best_params = study.best_params
        best_cv_score = study.best_value
        
        # 2. REBUILD BEST MODEL
        # We must manually map the best params back to the model
        steps = [build_preprocessing()]
        
        if use_pca:
            steps.append(PCA(n_components=best_params['pca_n']))
            
        model = make_estimator_for_name(algo)
        
        # Apply specific params found by Optuna
        if algo == "logistic":
            model.set_params(C=best_params["C"])
        elif algo == "random_forest":
            model.set_params(n_estimators=best_params["n_estimators"], max_depth=best_params["max_depth"])
        elif algo == "xgboost":
            model.set_params(learning_rate=best_params["learning_rate"], max_depth=best_params["max_depth"])
        elif algo == "lightgbm":
            model.set_params(learning_rate=best_params["learning_rate"], num_leaves=best_params["num_leaves"])
            
        steps.append(model)
        final_pipeline = make_pipeline(*steps)
        
        # 3. TRAIN ON FULL TRAIN SET
        final_pipeline.fit(X_train, y_train)
        
        # 4. EVALU

[I 2025-12-18 14:57:11,314] A new study created in memory with name: no-name-915c901a-15c2-4015-9320-2640a22a7bec
[I 2025-12-18 14:57:11,363] Trial 0 finished with value: 0.409993656370468 and parameters: {'C': 0.13292918943162169}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:57:11,397] Trial 1 finished with value: 0.38416422287390023 and parameters: {'C': 7.114476009343418}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:57:11,430] Trial 2 finished with value: 0.38416422287390023 and parameters: {'C': 1.5702970884055385}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:57:11,463] Trial 3 finished with value: 0.38895790200138025 and parameters: {'C': 0.6251373574521749}. Best is trial 0 with value: 0.409993656370468.


✓ Successfully imported housing_pipeline components.
✓ Database found at: /Users/nayanpaliwal/Desktop/Eas_final_project/housing_app_fall25/data/churn.db
✓ Data Loaded: 80 training samples.

Run Name                                      Best CV F1   Test F1     
---------------------------------------------------------------------------


[I 2025-12-18 14:57:11,497] Trial 4 finished with value: 0.41394716394716397 and parameters: {'C': 0.029380279387035364}. Best is trial 4 with value: 0.41394716394716397.
[I 2025-12-18 14:57:11,504] A new study created in memory with name: no-name-77329682-c27a-4cde-89ce-7e9552d811b8
[I 2025-12-18 14:57:11,544] Trial 0 finished with value: 0.34409017713365536 and parameters: {'pca_n': 0.8711626225809989, 'C': 7.114476009343418}. Best is trial 0 with value: 0.34409017713365536.
[I 2025-12-18 14:57:11,577] Trial 1 finished with value: 0.47222222222222227 and parameters: {'pca_n': 0.9390788489441669, 'C': 0.6251373574521749}. Best is trial 1 with value: 0.47222222222222227.
[I 2025-12-18 14:57:11,611] Trial 2 finished with value: 0.4122174122174122 and parameters: {'pca_n': 0.829643541684063, 'C': 0.029375384576328302}. Best is trial 1 with value: 0.47222222222222227.
[I 2025-12-18 14:57:11,644] Trial 3 finished with value: 0.37192437192437194 and parameters: {'pca_n': 0.8110358863119579,

In [10]:
# -----------------------------------------------------------------------------
# 4. RUN 8 TUNED EXPERIMENTS
# -----------------------------------------------------------------------------
# Configure MLflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("churn_experiments")

algorithms = ["logistic", "random_forest", "xgboost", "lightgbm"]
results = []

print(f"\n{'Run Name':<45} {'Best CV F1':<12} {'Test F1':<12}")
print("-" * 75)

for algo in algorithms:
    for use_pca in [False, True]:
        run_name = f"{algo}_{'pca' if use_pca else 'no_pca'}_tuned"
        
        # 1. OPTIMIZE
        study = optuna.create_study(direction="maximize", sampler=TPESampler(seed=42))
        # Running 5 trials for speed (increase to 20+ for real usage)
        study.optimize(lambda t: objective(t, algo, use_pca), n_trials=5)
        
        best_params = study.best_params
        best_cv_score = study.best_value
        
        # 2. REBUILD BEST MODEL
        # We must manually map the best params back to the model
        steps = [build_preprocessing()]
        
        if use_pca:
            steps.append(PCA(n_components=best_params['pca_n']))
            
        model = make_estimator_for_name(algo)
        
        # Apply specific params found by Optuna
        if algo == "logistic":
            model.set_params(C=best_params["C"])
        elif algo == "random_forest":
            model.set_params(n_estimators=best_params["n_estimators"], max_depth=best_params["max_depth"])
        elif algo == "xgboost":
            model.set_params(learning_rate=best_params["learning_rate"], max_depth=best_params["max_depth"])
        elif algo == "lightgbm":
            model.set_params(learning_rate=best_params["learning_rate"], num_leaves=best_params["num_leaves"])
            
        steps.append(model)
        final_pipeline = make_pipeline(*steps)
        
        # 3. TRAIN ON FULL TRAIN SET
        final_pipeline.fit(X_train, y_train)
        
        # 4. EVALUATE ON TEST SET
        y_pred = final_pipeline.predict(X_test)
        test_f1 = f1_score(y_test, y_pred)
        
        print(f"{run_name:<45} {best_cv_score:<12.4f} {test_f1:<12.4f}")
        
        # 5. LOG TO MLFLOW
        with mlflow.start_run(run_name=run_name):
            mlflow.log_params({**best_params, "algo": algo, "tuned": True})
            mlflow.log_metric("f1", test_f1)
            mlflow.sklearn.log_model(final_pipeline, "model")
            
        results.append({"name": run_name, "f1": test_f1, "model": final_pipeline})

[I 2025-12-18 14:58:42,535] A new study created in memory with name: no-name-3a5b3421-b70f-4094-b44a-ee050a8a98f0
[I 2025-12-18 14:58:42,641] Trial 0 finished with value: 0.409993656370468 and parameters: {'C': 0.13292918943162169}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:58:42,688] Trial 1 finished with value: 0.38416422287390023 and parameters: {'C': 7.114476009343418}. Best is trial 0 with value: 0.409993656370468.



Run Name                                      Best CV F1   Test F1     
---------------------------------------------------------------------------


[I 2025-12-18 14:58:42,743] Trial 2 finished with value: 0.38416422287390023 and parameters: {'C': 1.5702970884055385}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:58:42,787] Trial 3 finished with value: 0.38895790200138025 and parameters: {'C': 0.6251373574521749}. Best is trial 0 with value: 0.409993656370468.
[I 2025-12-18 14:58:42,835] Trial 4 finished with value: 0.41394716394716397 and parameters: {'C': 0.029380279387035364}. Best is trial 4 with value: 0.41394716394716397.


logistic_no_pca_tuned                         0.4139       0.2500      


[I 2025-12-18 14:58:47,744] A new study created in memory with name: no-name-ec29cf00-ca94-4970-a010-3936ddaa312c
[I 2025-12-18 14:58:47,806] Trial 0 finished with value: 0.34409017713365536 and parameters: {'pca_n': 0.8711626225809989, 'C': 7.114476009343418}. Best is trial 0 with value: 0.34409017713365536.
[I 2025-12-18 14:58:47,856] Trial 1 finished with value: 0.47222222222222227 and parameters: {'pca_n': 0.9390788489441669, 'C': 0.6251373574521749}. Best is trial 1 with value: 0.47222222222222227.
[I 2025-12-18 14:58:47,933] Trial 2 finished with value: 0.4122174122174122 and parameters: {'pca_n': 0.829643541684063, 'C': 0.029375384576328302}. Best is trial 1 with value: 0.47222222222222227.
[I 2025-12-18 14:58:47,996] Trial 3 finished with value: 0.37192437192437194 and parameters: {'pca_n': 0.8110358863119579, 'C': 3.9676050770529874}. Best is trial 1 with value: 0.47222222222222227.
[I 2025-12-18 14:58:48,073] Trial 4 finished with value: 0.4537037037037037 and parameters: {'p

logistic_pca_tuned                            0.4722       0.2500      


[I 2025-12-18 14:58:51,292] A new study created in memory with name: no-name-e2fce9f5-2f0d-494a-9ee2-ebc75c227ce6
[I 2025-12-18 14:58:51,590] Trial 0 finished with value: 0.3593073593073593 and parameters: {'n_estimators': 106, 'max_depth': 29}. Best is trial 0 with value: 0.3593073593073593.
[I 2025-12-18 14:58:52,106] Trial 1 finished with value: 0.3683707596751075 and parameters: {'n_estimators': 160, 'max_depth': 20}. Best is trial 1 with value: 0.3683707596751075.
[I 2025-12-18 14:58:52,368] Trial 2 finished with value: 0.3703703703703704 and parameters: {'n_estimators': 73, 'max_depth': 9}. Best is trial 2 with value: 0.3703703703703704.
[I 2025-12-18 14:58:52,525] Trial 3 finished with value: 0.3826086956521739 and parameters: {'n_estimators': 58, 'max_depth': 27}. Best is trial 3 with value: 0.3826086956521739.
[I 2025-12-18 14:58:52,982] Trial 4 finished with value: 0.34272973403408186 and parameters: {'n_estimators': 140, 'max_depth': 23}. Best is trial 3 with value: 0.382608

random_forest_no_pca_tuned                    0.3826       0.5882      


[I 2025-12-18 14:58:56,211] A new study created in memory with name: no-name-10d09b95-387a-4521-a184-967a2c932c31
[I 2025-12-18 14:58:56,683] Trial 0 finished with value: 0.3747863247863248 and parameters: {'pca_n': 0.8711626225809989, 'n_estimators': 193, 'max_depth': 24}. Best is trial 0 with value: 0.3747863247863248.
[I 2025-12-18 14:58:56,949] Trial 1 finished with value: 0.3651903651903652 and parameters: {'pca_n': 0.913745111997437, 'n_estimators': 73, 'max_depth': 9}. Best is trial 0 with value: 0.3747863247863248.
[I 2025-12-18 14:58:57,359] Trial 2 finished with value: 0.33149758454106276 and parameters: {'pca_n': 0.8110358863119579, 'n_estimators': 180, 'max_depth': 20}. Best is trial 0 with value: 0.3747863247863248.
[I 2025-12-18 14:58:57,525] Trial 3 finished with value: 0.29806763285024157 and parameters: {'pca_n': 0.9345337897812487, 'n_estimators': 53, 'max_depth': 30}. Best is trial 0 with value: 0.3747863247863248.
[I 2025-12-18 14:58:57,756] Trial 4 finished with va

random_forest_pca_tuned                       0.3748       0.3750      


[I 2025-12-18 14:59:01,111] A new study created in memory with name: no-name-2905907a-c300-42ad-ac91-5e149866f769
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-12-18 14:59:01,732] Trial 0 finished with value: 0.3651903651903652 and parameters: {'learning_rate': 0.03574712922600244, 'max_depth': 10}. Best is trial 0 with value: 0.3651903651903652.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-12-18 14:59:02,234] Trial 1 finished with value: 0.3108138238573021 and parameters: {'learning_rate': 0.1205712

xgboost_no_pca_tuned                          0.3652       0.6250      


[I 2025-12-18 14:59:07,850] A new study created in memory with name: no-name-78d8a36e-335b-473c-aec9-c95ccc78bfa0
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-12-18 14:59:08,365] Trial 0 finished with value: 0.39677230981578804 and parameters: {'pca_n': 0.8711626225809989, 'learning_rate': 0.2536999076681771, 'max_depth': 8}. Best is trial 0 with value: 0.39677230981578804.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-12-18 14:59:08,882] Trial 1 finished with value: 0.34432234432234426 and parameter

xgboost_pca_tuned                             0.4093       0.4706      


[I 2025-12-18 14:59:13,883] A new study created in memory with name: no-name-d460adba-285e-4cda-9a1a-3700d2a47aca
[I 2025-12-18 14:59:14,018] Trial 0 finished with value: 0.48937198067632853 and parameters: {'learning_rate': 0.03574712922600244, 'num_leaves': 97}. Best is trial 0 with value: 0.48937198067632853.
[I 2025-12-18 14:59:14,138] Trial 1 finished with value: 0.5005483405483405 and parameters: {'learning_rate': 0.1205712628744377, 'num_leaves': 68}. Best is trial 1 with value: 0.5005483405483405.
[I 2025-12-18 14:59:14,255] Trial 2 finished with value: 0.3674603174603175 and parameters: {'learning_rate': 0.01700037298921102, 'num_leaves': 32}. Best is trial 1 with value: 0.5005483405483405.
[I 2025-12-18 14:59:14,374] Trial 3 finished with value: 0.30000000000000004 and parameters: {'learning_rate': 0.012184186502221764, 'num_leaves': 90}. Best is trial 1 with value: 0.5005483405483405.
[I 2025-12-18 14:59:14,491] Trial 4 finished with value: 0.47222222222222227 and parameters

lightgbm_no_pca_tuned                         0.5005       0.5333      


[I 2025-12-18 14:59:17,384] A new study created in memory with name: no-name-76b130e9-0481-4022-8d63-91543df15fd3
[I 2025-12-18 14:59:17,518] Trial 0 finished with value: 0.40080267558528426 and parameters: {'pca_n': 0.8711626225809989, 'learning_rate': 0.2536999076681771, 'num_leaves': 79}. Best is trial 0 with value: 0.40080267558528426.
[I 2025-12-18 14:59:17,640] Trial 1 finished with value: 0.33296172426607207 and parameters: {'pca_n': 0.913745111997437, 'learning_rate': 0.01700037298921102, 'num_leaves': 32}. Best is trial 0 with value: 0.40080267558528426.
[I 2025-12-18 14:59:17,759] Trial 2 finished with value: 0.3882608695652174 and parameters: {'pca_n': 0.8110358863119579, 'learning_rate': 0.19030368381735815, 'num_leaves': 68}. Best is trial 0 with value: 0.40080267558528426.
[I 2025-12-18 14:59:17,882] Trial 3 finished with value: 0.37719298245614036 and parameters: {'pca_n': 0.9345337897812487, 'learning_rate': 0.010725209743171997, 'num_leaves': 98}. Best is trial 0 with 

lightgbm_pca_tuned                            0.4780       0.4762      


In [11]:
# Emergency Save Script
import joblib
import os

# 1. Define the correct path
# This puts it in housing_app_fall25/models/
save_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'models', 'global_best_model_classification.pkl'))

# If that path doesn't look right (e.g. if you are running from root), try this:
if not os.path.exists(os.path.dirname(save_path)):
    save_path = os.path.abspath(os.path.join(os.getcwd(), 'models', 'global_best_model_classification.pkl'))

print(f"Attempting to save to: {save_path}")

# 2. Grab the best model from memory
# (This assumes you still have the 'results' list from running the notebook)
if 'results' in locals() and len(results) > 0:
    global_best = max(results, key=lambda x: x['f1'])
    joblib.dump(global_best['model'], save_path)
    print("✅ Success! Model saved.")
    print(f"File created at: {save_path}")
else:
    print("❌ Error: 'results' variable not found.")
    print("Please RE-RUN the training cell (Step 4) in this notebook first.")

Attempting to save to: /Users/nayanpaliwal/Desktop/Eas_final_project/housing_app_fall25/models/global_best_model_classification.pkl
✅ Success! Model saved.
File created at: /Users/nayanpaliwal/Desktop/Eas_final_project/housing_app_fall25/models/global_best_model_classification.pkl
