In [7]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [8]:
client.search_experiments()

[<Experiment: artifact_location='/workspaces/MLOps-Zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1719736280654, experiment_id='1', last_update_time=1719736280654, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1719735940979, experiment_id='0', last_update_time=1719735940979, lifecycle_stage='active', name='Default', tags={}>]

In [9]:
client.create_experiment(name='my-cool-experiment')

'2'

In [15]:
from mlflow.entities import ViewType

runs = client.search_runs(
  experiment_ids='1',
  filter_string="metrics.rmse < 6.8",
  run_view_type=ViewType.ACTIVE_ONLY,
  max_results=5,
  order_by=['metrics.rmse ASC']
)

In [16]:
for run in runs:
  print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: d9b941243a424584b8103197c97f5b82, rmse: 6.3345
run id: 494a1a10629d42c5aad6e5fd71851dbf, rmse: 6.3536
run id: 4a62cb02581b4b37a3db96cfef6c1f06, rmse: 6.3610
run id: ed343f11aa67484d9aca3f97f08bd07b, rmse: 6.3645
run id: d799804d00cf42bf809530d8c460ee03, rmse: 6.3648


In [17]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [19]:
run_id = "494a1a10629d42c5aad6e5fd71851dbf"
model_uri = f'runs:/{run_id}/model'

mlflow.register_model(model_uri=model_uri, name='nyc_taxi_regressor')

Registered model 'nyc_taxi_regressor' already exists. Creating a new version of this model...
Created version '1' of model 'nyc_taxi_regressor'.


<ModelVersion: aliases=[], creation_timestamp=1719824284651, current_stage='None', description=None, last_updated_timestamp=1719824284651, name='nyc_taxi_regressor', run_id='494a1a10629d42c5aad6e5fd71851dbf', run_link=None, source='/workspaces/MLOps-Zoomcamp/02-experiment-tracking/mlruns/1/494a1a10629d42c5aad6e5fd71851dbf/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [22]:
model_name = 'nyc_taxi_regressor'
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
  print(f"version: {version.version}, stage: {version.current_stage}")

version: 2, stage: None


  latest_versions = client.get_latest_versions(name=model_name)


In [26]:
model_version = 2
new_stage = 'Staging'

client.set_registered_model_alias(name=model_name,
                                  alias=new_stage,
                                  version=model_version,)

In [25]:
from datetime import datetime

In [27]:
date =datetime.today().date()

client.update_model_version(
  name=model_name,
  version=model_version,
  description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=['Staging'], creation_timestamp=1719824412489, current_stage='None', description='The model version 2 was transitioned to Staging on 2024-07-01', last_updated_timestamp=1719825409236, name='nyc_taxi_regressor', run_id='4a62cb02581b4b37a3db96cfef6c1f06', run_link='', source='/workspaces/MLOps-Zoomcamp/02-experiment-tracking/mlruns/1/4a62cb02581b4b37a3db96cfef6c1f06/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [40]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
  df = pd.read_parquet(filename)

  df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
  df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

  df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
  df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

  df = df[(df.duration >= 1) & (df.duration <= 60)]

  categorical = ['PULocationID', 'DOLocationID']
  df[categorical] = df[categorical].astype(str)

  return df


def preprocess(df, dv):
  df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
  categorical = ['PU_DO']
  numerical = ['trip_distance']
  train_dicts = df[categorical + numerical].to_dict(orient='records')
  return dv.transform(train_dicts)


def test_model(run_id, X_test, y_test):
  model = mlflow.pyfunc.load_model(f"runs:/{run_id}/models_mlflow")
  y_pred = model.predict(X_test)
  return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [29]:
df = read_dataframe('data/green_tripdata_2021-03.parquet')

In [31]:
run_id = "4a62cb02581b4b37a3db96cfef6c1f06"
client.download_artifacts(run_id=run_id, path='preprocessor', dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/workspaces/MLOps-Zoomcamp/02-experiment-tracking/preprocessor'

In [32]:
import pickle

with open('preprocessor/preprocessor.b', 'rb') as f_in:
  dv = pickle.load(f_in)

In [33]:
X_test = preprocess(df, dv)

In [34]:
target = 'duration'
y_test = df[target].values

In [41]:
%time test_model(run_id=run_id, X_test=X_test, y_test=y_test)

CPU times: user 5.64 s, sys: 253 ms, total: 5.89 s
Wall time: 5.29 s




{'rmse': 6.3108828750265875}

In [42]:
model_version = 2
new_stage = 'Production'

client.set_registered_model_alias(name=model_name,
                                  alias=new_stage,
                                  version=model_version,)

In [43]:
date =datetime.today().date()

client.update_model_version(
  name=model_name,
  version=model_version,
  description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=['Production'], creation_timestamp=1719824412489, current_stage='None', description='The model version 2 was transitioned to Production on 2024-07-01', last_updated_timestamp=1719827237763, name='nyc_taxi_regressor', run_id='4a62cb02581b4b37a3db96cfef6c1f06', run_link='', source='/workspaces/MLOps-Zoomcamp/02-experiment-tracking/mlruns/1/4a62cb02581b4b37a3db96cfef6c1f06/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>