##### Train the models

In [1]:
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

##### SET tracking location & experiment name

In [2]:
# --------------------------------------------------
# 1) SET tracking location & experiment name
# --------------------------------------------------
mlflow.set_tracking_uri("file:./mlruns")  # this will make a local ./mlruns folder
mlflow.set_experiment("Housing Price Prediction")  # experiment will show up in UI

<Experiment: artifact_location='file:///d:/MLOpsSAssignment/mlops-housing-project/notebooks/mlruns/900441419412460389', creation_time=1754225402719, experiment_id='900441419412460389', last_update_time=1754225402719, lifecycle_stage='active', name='Housing Price Prediction', tags={}>

##### Loading the data

In [3]:
# --------------------------------------------------
# 2) Load and Split Data
# --------------------------------------------------
data = fetch_california_housing(as_frame=True)
df = pd.concat([data.data, data.target.rename("MedHouseVal")], axis=1)
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns="MedHouseVal"), df["MedHouseVal"], test_size=0.2, random_state=42
)

##### Train & log both models

In [5]:
# --------------------------------------------------
# 3) Train & log both models
# --------------------------------------------------
results = {}
for model_name, model in {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor()
}.items():

    with mlflow.start_run(run_name=model_name):
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        rmse = root_mean_squared_error(y_test, preds)

        # logging
        mlflow.log_param("model_type", model_name)
        mlflow.log_metric("rmse", rmse)
        # --> IMPORTANT: artifact_path MUST be "model"
        mlflow.sklearn.log_model(model, artifact_path="model")
        
        print(f"{model_name}: RMSE = {rmse:.4f}")
        results[model_name] = (rmse, mlflow.active_run().info.run_id)



LinearRegression: RMSE = 0.7456
🏃 View run LinearRegression at: http://localhost:5000/#/experiments/473694890830989136/runs/dac06447269e44a7ade68f695dfbb35e
🧪 View experiment at: http://localhost:5000/#/experiments/473694890830989136




DecisionTree: RMSE = 0.7141
🏃 View run DecisionTree at: http://localhost:5000/#/experiments/473694890830989136/runs/98f4a30856964acfb3bcf4a91acafada
🧪 View experiment at: http://localhost:5000/#/experiments/473694890830989136


##### Selecting the best model from those two

In [6]:
# --------------------------------------------------
# 4) Pick Best run
# --------------------------------------------------
best_model_name = min(results, key=lambda x: results[x][0])
best_rmse, best_run_id = results[best_model_name]

print(f"\nBest model: {best_model_name} with RMSE = {best_rmse:.4f}")
model_uri = f"runs:/{best_run_id}/model"


Best model: DecisionTree with RMSE = 0.7141


#####  MLflow Model Registry

In [7]:

# --------------------------------------------------
# 5) Register in Model Registry
# --------------------------------------------------
registered_name = "BestHousingModel"

client = MlflowClient()
model_version = mlflow.register_model(model_uri, registered_name)

print(f"Registered {registered_name} as version {model_version.version}")

Registered model 'BestHousingModel' already exists. Creating a new version of this model...
2025/08/03 20:25:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: BestHousingModel, version 1
Created version '1' of model 'BestHousingModel'.


Registered BestHousingModel as version 1
