In [1]:
from mlflow.tracking import MlflowClient

In [2]:
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [4]:
client.list_experiments()

[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>]

In [5]:
from mlflow.entities import ViewType

runs = client.search_runs(experiment_ids="1", 
                         filter_string="",
                         run_view_type=ViewType.ACTIVE_ONLY,
                         max_results=5,
                         order_by=["metrics.rmse ASC"])

In [10]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: df48ac6dbaf14cc7aab8206aeb51c314, rmse: 6.2890
run id: 7e5e17784e8d41a1b000f731ae3e97d5, rmse: 6.2932
run id: 13220f9d6dd14a65a51b09dae07c4605, rmse: 6.2951
run id: 5da6357c33b4499fb96c5f8c6bb2b608, rmse: 6.3058
run id: 76a5f00236124be9b343061b621254c9, rmse: 6.3080


### register a model

In [11]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [12]:
run_id = "76a5f00236124be9b343061b621254c9"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2023/07/12 18:22:29 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 4
Created version '4' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1689182549181, current_stage='None', description=None, last_updated_timestamp=1689182549181, name='nyc-taxi-regressor', run_id='76a5f00236124be9b343061b621254c9', run_link=None, source='./mlruns/1/76a5f00236124be9b343061b621254c9/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [15]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Production
version: 4, stage: Staging


In [16]:
# transition of a version
model_version = 4
new_stage = "Staging"
client.transition_model_version_stage(name=model_name, 
                                     version=model_version,
                                     stage=new_stage,
                                     archive_existing_versions=False)

<ModelVersion: creation_timestamp=1689182549181, current_stage='Staging', description=None, last_updated_timestamp=1689183091500, name='nyc-taxi-regressor', run_id='76a5f00236124be9b343061b621254c9', run_link=None, source='./mlruns/1/76a5f00236124be9b343061b621254c9/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [17]:
# update the model
from datetime import datetime
date = datetime.today().date()

client.update_model_version(name=model_name,
                           version=model_version,
                           description=f"The model version {model_version} was transitioned to {new_stage} on {date}")

<ModelVersion: creation_timestamp=1689182549181, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2023-07-12', last_updated_timestamp=1689183222915, name='nyc-taxi-regressor', run_id='76a5f00236124be9b343061b621254c9', run_link=None, source='./mlruns/1/76a5f00236124be9b343061b621254c9/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

### comparing version and select the new "production" model

In [20]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [21]:
df = read_dataframe("data/green_tripdata_2021-03.parquet")

In [24]:
# The model to test
run_id = "a753a490971d41c08186635fccb9a338"
client.download_artifacts(run_id=run_id, path='preprocessor', dst_path='.')

'C:\\Users\\RD_3\\Documents\\Python\\MLOPS\\mlops\\experiment_tracking\\preprocessor'

In [25]:
import pickle

with open("preprocessor/preprocessor.b", "rb") as f:
    dv = pickle.load(f)

In [26]:
X_test = preprocess(df, dv)

In [27]:
target = "duration"
y_test = df[target].values

In [32]:
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=y_test)

CPU times: total: 46.7 s
Wall time: 6.08 s


{'rmse': 6.287683986165473}

In [33]:
%time test_model(name=model_name, stage="Staging", X_test=X_test, y_test=y_test)

CPU times: total: 2.36 s
Wall time: 330 ms


{'rmse': 8.294535949238488}

In [34]:
# build a function to perform transition model version
# with description, stage