In [None]:
!pip install mlflow
!pip install -U scikit-learn pandas joblib


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from pathlib import Path
import numpy as np

# Load dataset
housing = fetch_california_housing(as_frame=True)
df = housing.frame
X = df.drop("MedHouseVal", axis=1)
y = df["MedHouseVal"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create artifact repository
REPO_ROOT = Path.cwd()
MODELS_DIR = REPO_ROOT / "models"
MODELS_DIR.mkdir(parents=True, exist_ok=True)
print(f"Artifacts will be stored in: {MODELS_DIR}")

# Check and create default experiment if needed
if not mlflow.get_experiment_by_name("Default"):
    mlflow.create_experiment(name="Default")

# ------------------------

#input signature


# -------------

# ------------------------
# Linear Regression - MLflow Run
# ------------------------

# Optional: Check where it's tracking
print("Tracking URI:", mlflow.get_tracking_uri())

mlflow.set_experiment("LinearRegressionExperiment")

with mlflow.start_run(run_name="LinearRegressionRun") as run1:
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)
    lr_preds = lr_model.predict(X_test)

    input_example = X_test.sample(5)
    predicted = lr_model.predict(input_example)

    # Infer model signature
    signature = infer_signature(input_example, predicted)


    lr_rmse = np.sqrt(mean_squared_error(y_test, lr_preds))  # default squared=True
    lr_r2 = r2_score(y_test, lr_preds)

    print("🔹 Linear Regression:")
    print(f"RMSE: {lr_rmse:.4f}")
    print(f"R² Score: {lr_r2:.4f}")

    # Save model locally
    joblib.dump(lr_model, MODELS_DIR / "linear_regression.pkl")

    # Log model
    mlflow.sklearn.log_model(lr_model, name="model", input_example=input_example, signature=signature)

    # Register model
    mlflow.register_model(
        model_uri=f"runs:/{run1.info.run_id}/model",
        name="LogisticRegression"
    )

    # Log metrics
    mlflow.log_metric("rmse", lr_rmse)
    mlflow.log_metric("r2", lr_r2)


# ------------------------
# Decision Tree - MLflow Run
# ------------------------
mlflow.set_experiment("DecisionTreeExperiment")
with mlflow.start_run(run_name="DecisionTreeRun") as run2:
    dt_model = DecisionTreeRegressor(random_state=42, max_depth=10)
    dt_model.fit(X_train, y_train)
    dt_preds = dt_model.predict(X_test)

    dt_rmse = np.sqrt(mean_squared_error(y_test, dt_preds))
    dt_r2 = r2_score(y_test, dt_preds)

    input_example = X_test.sample(5)
    predicted = lr_model.predict(input_example)

    # Infer model signature
    signature = infer_signature(input_example, predicted)

    print("\n🔹 Decision Tree Regressor:")
    print(f"RMSE: {dt_rmse:.4f}")
    print(f"R² Score: {dt_r2:.4f}")

    # Save model locally
    joblib.dump(dt_model, MODELS_DIR / "decision_tree_model.pkl")
    # Log model
    mlflow.sklearn.log_model(dt_model, name="model", input_example=input_example, signature=signature)

    # Register model
    mlflow.register_model(
        model_uri=f"runs:/{run2.info.run_id}/model",
        name="DecisionTreeRegressor"
        
    )

    # Log metrics
    mlflow.log_metric("rmse", dt_rmse)
    mlflow.log_metric("r2", dt_r2)

🔹 Linear Regression:
RMSE: 0.7456
R² Score: 0.5758

🔹 Decision Tree Regressor:
RMSE: 0.6446
R² Score: 0.6829


['C:\\Users\\kushl\\Documents\\MLOpsAssignment\\mlops-assignmentOne\\models\\decision_tree_model.pkl']