Till now, we've experimented and managed using Mlflow UI

Now we'll interact with MLflow using API

In [1]:
import mlflow

### Connecting

In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client=MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

### Creating an experiment

In [5]:
client.create_experiment(name="test-experiment")

'2'

### Deep dive into experiments

#### Getting details of runs

In [13]:
from mlflow.entities.view_type import ViewType

runs=client.search_runs(
    experiment_ids='1',
    # filter_string="",    
    filter_string="metrics.rmse < 6.15",  
    run_view_type=ViewType.ACTIVE_ONLY,   #get only active runs
    max_results=5,
    order_by= ["metrics.rmse ASC"]
)

In [14]:
runs

[<Run: data=<RunData: metrics={'rmse': 6.149702691101375}, params={'learning_rate': '0.1197695995106272',
  'max_depth': '6',
  'min_child_weight': '1.9909299773230704',
  'objective': 'reg:linear',
  'reg_alpha': '0.007622823128939152',
  'reg_lambda': '0.05883529019487995',
  'seed': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "ed52e50ff78e46d3aa0ebef6b663a20d", '
                              '"artifact_path": "models_mlflow", '
                              '"utc_time_created": "2025-03-10 '
                              '14:18:39.777441", "model_uuid": '
                              '"377f6af9e03a44e69faec1ba44a8998b", "flavors": '
                              '{"python_function": {"loader_module": '
                              '"mlflow.xgboost", "python_version": "3.12.7", '
                              '"data": "model.xgb", "env": {"conda": '
                              '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                              '"xgboost":

In [15]:
for run in runs:
    print(f"run id: {run.info.run_id}; rmse: {run.data.metrics['rmse']}")

run id: ed52e50ff78e46d3aa0ebef6b663a20d; rmse: 6.149702691101375
run id: 9d3499d15365492bb9301a971cb18271; rmse: 6.149702691101375


#### Registering models

Note: If model already exists, a new version of that model will be created.

In [16]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [17]:
run_id='9d3499d15365492bb9301a971cb18271'
model_uri=f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri,name='nyc-taxi-regressor')

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1741620639096, current_stage='None', description=None, last_updated_timestamp=1741620639096, name='nyc-taxi-regressor', run_id='9d3499d15365492bb9301a971cb18271', run_link=None, source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/9d3499d15365492bb9301a971cb18271/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

#### Transition a model from one stage to another

##### Method 1 (deprecated method) 

In [21]:
model_name='nyc-taxi-regressor'
model_info=client.get_registered_model(name=model_name)

model_info

<RegisteredModel: aliases={'prod': 1, 'stage': 2}, creation_timestamp=1741618421334, description='The NYC Taxi Prediction for Trip duration', last_updated_timestamp=1741620639096, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1741620639096, current_stage='None', description=None, last_updated_timestamp=1741620639096, name='nyc-taxi-regressor', run_id='9d3499d15365492bb9301a971cb18271', run_link=None, source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/9d3499d15365492bb9301a971cb18271/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='nyc-taxi-regressor', tags={}>

In [23]:
latest_versions=model_info.latest_versions
latest_versions

[<ModelVersion: aliases=[], creation_timestamp=1741620639096, current_stage='None', description=None, last_updated_timestamp=1741620639096, name='nyc-taxi-regressor', run_id='9d3499d15365492bb9301a971cb18271', run_link=None, source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/9d3499d15365492bb9301a971cb18271/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>]

In [25]:
for version in latest_versions:
    print(f"version: {version.version}; stage: {version.current_stage}")

version: 3; stage: None


In [None]:
#transitioning model
model_version=3

new_stage="Staging"

client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1741620639096, current_stage='Staging', description=None, last_updated_timestamp=1741622183190, name='nyc-taxi-regressor', run_id='9d3499d15365492bb9301a971cb18271', run_link=None, source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/9d3499d15365492bb9301a971cb18271/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [30]:
from datetime import datetime

date=datetime.today().date()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1741620639096, current_stage='Staging', description='The model version 3 was transitioned to Staging on 2025-03-10', last_updated_timestamp=1741622213211, name='nyc-taxi-regressor', run_id='9d3499d15365492bb9301a971cb18271', run_link=None, source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/9d3499d15365492bb9301a971cb18271/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

##### Method 2

In [None]:
model_name='nyc-taxi-regressor'

model_info=client.get_model_version_by_alias(name=model_name,alias='stage')
model_info

<ModelVersion: aliases=['stage'], creation_timestamp=1741618549842, current_stage='None', description='', last_updated_timestamp=1741618549842, name='nyc-taxi-regressor', run_id='2a96f7f7df7449d79da04cd52fe099e7', run_link='', source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/2a96f7f7df7449d79da04cd52fe099e7/artifacts/model', status='READY', status_message=None, tags={'model': 'xgboost_v2'}, user_id=None, version=2>

In [36]:
model_version=2
new_alias="prod"
client.set_registered_model_alias(name=model_name,alias=new_alias,version=model_version)

In [37]:
model_name='nyc-taxi-regressor'

model_info=client.get_model_version_by_alias(name=model_name,alias='prod')
model_info

<ModelVersion: aliases=['prod', 'stage'], creation_timestamp=1741618549842, current_stage='None', description='', last_updated_timestamp=1741618549842, name='nyc-taxi-regressor', run_id='2a96f7f7df7449d79da04cd52fe099e7', run_link='', source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/2a96f7f7df7449d79da04cd52fe099e7/artifacts/model', status='READY', status_message=None, tags={'model': 'xgboost_v2'}, user_id=None, version=2>

Successfully added new alias for v2 model. But alias of v1 model is removed in this process.

Let's give 'prod' to v1 model

In [38]:
model_version=1
new_alias="prod"
client.set_registered_model_alias(name=model_name,alias=new_alias,version=model_version)


model_info=client.get_model_version_by_alias(name=model_name,alias='prod')
model_info

<ModelVersion: aliases=['prod'], creation_timestamp=1741618421364, current_stage='None', description='', last_updated_timestamp=1741618421364, name='nyc-taxi-regressor', run_id='ed52e50ff78e46d3aa0ebef6b663a20d', run_link='', source='/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/ed52e50ff78e46d3aa0ebef6b663a20d/artifacts/models_mlflow', status='READY', status_message=None, tags={'model': 'xgboost'}, user_id=None, version=1>

In [39]:
model_info.source

'/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/mlruns/1/ed52e50ff78e46d3aa0ebef6b663a20d/artifacts/models_mlflow'

In [41]:
model=mlflow.xgboost.load_model(model_info.source)
model

<xgboost.core.Booster at 0x14e3e29c0>

#### Prediction pipeline

In [65]:
from sklearn.metrics import root_mean_squared_error
import pandas as pd
import xgboost as xgb

def read_dataframe(filename):
    df=pd.read_parquet(filename)

    #getting trip duration
    df['duration']=df.lpep_dropoff_datetime-df.lpep_pickup_datetime
    df.duration=df.duration.apply(lambda td: td.total_seconds()/60)

    #filtering
    df=df[((df.duration>=1) & (df.duration<=60))]

    categorical=['PULocationID','DOLocationID']
    numerical=['trip_distance']

    df[categorical]=df[categorical].astype(str)  #converting to string

    return df


def preprocess(df,dv):
    categorical=['PULocationID','DOLocationID']
    numerical=['trip_distance']
    train_dict=df[categorical+numerical].to_dict(orient='records')

    return dv.transform(train_dict)


def test_model(X_test, y_test,model_name='nyc-taxi-regressor', alias='prod'):
    logged_model=client.get_model_version_by_alias(name=model_name, alias=alias).source
    model=mlflow.xgboost.load_model(logged_model)

    #get DMatrix object
    test=xgb.DMatrix(X_test,label=y_test)
    y_pred= model.predict(test)

    return {'rmse':root_mean_squared_error(y_test,y_pred)}


def test_model_v2(X_test, y_test,model_name='nyc-taxi-regressor', alias='prod'):
    logged_model=client.get_model_version_by_alias(name=model_name, alias=alias).source
    model=mlflow.pyfunc.load_model(logged_model)
    y_pred= model.predict(X_test)

    return {'rmse':root_mean_squared_error(y_test,y_pred)}

In [57]:
df_test=read_dataframe('./data/green_tripdata_2021-03.parquet')
df_test.head()

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2021-03-01 00:05:42,2021-03-01 00:14:03,N,1.0,83,129,1.0,1.56,7.5,...,0.5,0.0,0.0,,0.3,8.8,1.0,1.0,0.0,8.35
1,2,2021-03-01 00:21:03,2021-03-01 00:26:17,N,1.0,243,235,1.0,0.96,6.0,...,0.5,0.0,0.0,,0.3,7.3,2.0,1.0,0.0,5.233333
2,2,2021-03-01 00:02:06,2021-03-01 00:22:26,N,1.0,75,242,1.0,9.93,28.0,...,0.5,2.0,0.0,,0.3,31.3,1.0,1.0,0.0,20.333333
3,2,2021-03-01 00:24:03,2021-03-01 00:31:43,N,1.0,242,208,1.0,2.57,9.5,...,0.5,0.0,0.0,,0.3,10.8,2.0,1.0,0.0,7.666667
4,1,2021-03-01 00:11:10,2021-03-01 00:14:46,N,1.0,41,151,1.0,0.8,5.0,...,0.5,1.85,0.0,,0.3,8.15,1.0,1.0,0.0,3.6


In [46]:
#getting dictvectorizer
client.download_artifacts(run_id='ed52e50ff78e46d3aa0ebef6b663a20d',path='preprocessor',dst_path='.')

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 2359.00it/s]


'/Users/mahmudhasan/Desktop/others/personal_projects/mlops_zoomcamp_solution/02-experiment-tracking/preprocessor'

In [48]:
import pickle

with open("preprocessor/preprocessor.b",'rb') as f_in:
    dv=pickle.load(f_in)

In [None]:
X_test= preprocess(df_test,dv)
X_test

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 241099 stored elements and shape (80372, 507)>

In [None]:
target='duration'
y_test=df_test[target]

In [64]:
#method 1 (using Dmatrix object)
%time test_model(X_test,y_test)

CPU times: user 2.66 s, sys: 13.5 ms, total: 2.67 s
Wall time: 397 ms


{'rmse': 6.076809458527859}

In [66]:
#method 2 (using python object)
%time test_model_v2(X_test,y_test)

CPU times: user 2.81 s, sys: 40.1 ms, total: 2.85 s
Wall time: 575 ms


{'rmse': 6.076809458527859}