## Learning to use MLflowClient

In [1]:
from mlflow.tracking import MlflowClient

In [2]:
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri = MLFLOW_TRACKING_URI)

In [3]:
client.search_experiments()

[<Experiment: artifact_location='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/2', creation_time=1747427071090, experiment_id='2', last_update_time=1747427071090, lifecycle_stage='active', name='my-cool-experiment', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/1', creation_time=1747076093313, experiment_id='1', last_update_time=1747076093313, lifecycle_stage='active', name='nyc_taxi_experiment', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/0', creation_time=1747076093301, experiment_id='0', last_update_time=1747076093301, lifecycle_stage='active', name='Default', tags={}>]

In [4]:
#client.create_experiment(name = 'my-cool-experiment')

In [5]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids = '1',
    filter_string = "",
    run_view_type = ViewType.ACTIVE_ONLY,
    max_results = 5,
    order_by = ["metrics.rmse ASC"]
)

In [6]:
for run in runs:
    print("run id: {}".format(run.info.run_id), "rmse: {}".format(round(run.data.metrics["rmse"],4)))

run id: 55140c2782a24b869b3d9d84ff7ef79c rmse: 6.3268
run id: 37ecb3e987924fa796fa4b769bd958b7 rmse: 6.3281
run id: d47858d78c204939a1bd9a9be0019664 rmse: 6.3281
run id: 9e250d49e34745af87534a451467bbef rmse: 6.3281
run id: 1772eccf0f87494f9c9fbc81e14473b6 rmse: 6.3281


In [7]:
import mlflow

In [8]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [9]:
run_id = "9e250d49e34745af87534a451467bbef"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri = model_uri, name = "nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '5' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1747432214196, current_stage='None', description=None, last_updated_timestamp=1747432214196, name='nyc-taxi-regressor', run_id='9e250d49e34745af87534a451467bbef', run_link=None, source='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/1/9e250d49e34745af87534a451467bbef/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=5>

In [10]:
client.search_registered_models()

[<RegisteredModel: aliases={'production': 3, 'staging': 1, 'stating': 2}, creation_timestamp=1747425297451, description='This is the new NTC Taxi predictor for Trip Duration', last_updated_timestamp=1747432214196, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1747432214196, current_stage='None', description=None, last_updated_timestamp=1747432214196, name='nyc-taxi-regressor', run_id='9e250d49e34745af87534a451467bbef', run_link=None, source='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/1/9e250d49e34745af87534a451467bbef/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=5>], name='nyc-taxi-regressor', tags={}>]

In [11]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name = model_name)
for version in latest_versions:
    print("version: {} ,  alias: {}".format(version.version, version.aliases ))

version: 5 ,  alias: []


  latest_versions = client.get_latest_versions(name = model_name)


In [12]:
#Uploading the model
from datetime import datetime
date = datetime.today().date()
model_version = 3
client.update_model_version(
    name = model_name,
    version = model_version,
    description = "The model version {} was transitioned to production on {}".format(model_version, date)
)

<ModelVersion: aliases=['production'], creation_timestamp=1747427946224, current_stage='None', description='The model version 3 was transitioned to production on 2025-05-16', last_updated_timestamp=1747432214546, name='nyc-taxi-regressor', run_id='9e250d49e34745af87534a451467bbef', run_link=None, source='file:///c:/Users/mokon/Documents/Mlops-Zoomcamp/experiment_tracking/mlruns/1/9e250d49e34745af87534a451467bbef/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [71]:
from sklearn.metrics import mean_squared_error
import pandas as pd

def read_dataframe(filename):
    df = pd.read_parquet(filename)
    
    df["duration"] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df["duration"] = df["duration"].apply(lambda td: td.total_seconds()/60)
    
    df = df[(df.duration >=1) & (df.duration <=60)]
    
    categorical = ["PULocationID", "DOLocationID"]
    df[categorical] = df[categorical].astype(str)
    
    return df

def preprocess(df, dv):
    numerical = ["trip_distance"]
    categorical = ["PULocationID", "DOLocationID"]
    train_dicts = df[categorical + numerical].to_dict(orient="records")
    return dv.transform(train_dicts)

def test_model(name, alias, X_test, y_test):
    client = MlflowClient()
    version_info = client.get_model_version_by_alias(name=name, alias=alias)
    model = mlflow.pyfunc.load_model(version_info.source)
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared = False)}

In [14]:
df = read_dataframe(r"C:\Users\mokon\Documents\Mlops-Zoomcamp\green_tripdata_2021-03.parquet")

In [15]:
#Let's load the model
client.download_artifacts(run_id = run_id, path = "preprocessor", dst_path=".")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'c:\\Users\\mokon\\Documents\\Mlops-Zoomcamp\\experiment_tracking\\preprocessor'

In [16]:
import pickle
with open("preprocessor/preprocessor.b","rb") as f_in:
    dv = pickle.load(f_in)

In [17]:
X_test = preprocess(df, dv)

In [18]:
target = "duration"
y_test = df[target].values

In [72]:
%time test_model(name = model_name, alias = "production", X_test = X_test, y_test = y_test )

CPU times: total: 1min 35s
Wall time: 16.1 s


{'rmse': 6.78649006142896}