In [3]:
# ===============================
# 1. IMPORTS
# ===============================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import mlflow
import mlflow.sklearn

# ===============================
# 2. MLFLOW CONFIG (CRITICAL)
# ===============================
mlflow.set_tracking_uri(
    "file:///C:/Users/asmis/OneDrive/Desktop/EMI_Predict V1/EMI_Predict/mlruns"
)
mlflow.set_experiment("Max_EMI_Regression")

# ===============================
# 3. LOAD DATA
# ===============================
df = pd.read_csv("emi_feature_engineered.csv")

X = df.drop(["emi_eligibility", "max_monthly_emi"], axis=1)
y = df["max_monthly_emi"]

# ===============================
# 4. SPLIT
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# 5. PREPROCESSOR
# ===============================
num_features = X.select_dtypes(include=["int64", "float64"]).columns
cat_features = X.select_dtypes(include=["object"]).columns

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
    ]
)

# ===============================
# 6. LINEAR REGRESSION
# ===============================
with mlflow.start_run(run_name="Linear_Regression"):
    model = Pipeline([
        ("preprocessor", preprocessor),
        ("regressor", LinearRegression())
    ])

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, y_pred)))
    mlflow.log_metric("mae", mean_absolute_error(y_test, y_pred))
    mlflow.log_metric("r2", r2_score(y_test, y_pred))

    mlflow.sklearn.log_model(
        model,
        name="linear_regression_model",
        registered_model_name="Max_EMI_Linear"
    )

# ===============================
# 7. RANDOM FOREST REGRESSOR
# ===============================
with mlflow.start_run(run_name="Random_Forest_Regressor"):
    model = Pipeline([
        ("preprocessor", preprocessor),
        ("regressor", RandomForestRegressor(
            n_estimators=200,
            max_depth=12,
            random_state=42
        ))
    ])

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, y_pred)))
    mlflow.log_metric("mae", mean_absolute_error(y_test, y_pred))
    mlflow.log_metric("r2", r2_score(y_test, y_pred))

    mlflow.sklearn.log_model(
        model,
        name="random_forest_regressor",
        registered_model_name="Max_EMI_RF"
    )

# ===============================
# 8. XGBOOST REGRESSOR
# ===============================
with mlflow.start_run(run_name="XGBoost_Regressor"):
    model = Pipeline([
        ("preprocessor", preprocessor),
        ("regressor", XGBRegressor(
            n_estimators=300,
            max_depth=6,
            learning_rate=0.05,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        ))
    ])

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mlflow.log_metric("rmse", np.sqrt(mean_squared_error(y_test, y_pred)))
    mlflow.log_metric("mae", mean_absolute_error(y_test, y_pred))
    mlflow.log_metric("r2", r2_score(y_test, y_pred))

    mlflow.sklearn.log_model(
        model,
        name="xgboost_regressor",
        registered_model_name="Max_EMI_XGB"
    )


Traceback (most recent call last):
  File "C:\Users\asmis\AppData\Roaming\Python\Python311\site-packages\mlflow\store\tracking\file_store.py", line 379, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\asmis\AppData\Roaming\Python\Python311\site-packages\mlflow\store\tracking\file_store.py", line 477, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\asmis\AppData\Roaming\Python\Python311\site-packages\mlflow\store\tracking\file_store.py", line 1662, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\asmis\AppData\Roaming\Python\Python311\site-packages\mlflow\store\tracking\file_store.py", line 1655, in _read_helper
    result = read_yaml(r