In [48]:
import mlflow
import pandas as pd
from mlflow.entities import ViewType
from mlflow.tracking import MlflowClient
from sklearn.metrics import mean_squared_error

In [17]:
client = MlflowClient(tracking_uri="sqlite:///mlflow.db")

In [18]:
client.search_experiments()

[<Experiment: artifact_location='/Users/lap02105/workking_space/learn_mlops/week_2/mlruns/2', creation_time=1687760545631, experiment_id='2', last_update_time=1687760545631, lifecycle_stage='active', name='my-cool-experiment', tags={}>,
 <Experiment: artifact_location='/Users/lap02105/workking_space/learn_mlops/week_2/mlruns/1', creation_time=1687759873120, experiment_id='1', last_update_time=1687759873120, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1687759617356, experiment_id='0', last_update_time=1687759617356, lifecycle_stage='active', name='Default', tags={}>]

In [19]:
# client.create_experiment(name="my-cool-experiment")

In [30]:
runs = client.search_runs(
    experiment_ids="1",
    filter_string="metrics.rmse < 6.7 ",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [31]:
for item in runs:
    print(f"run id: {item.info.run_id}, rmse: {item.data.metrics['rmse']:.4f}")

run id: 20689bc665c94e32b55c2c2346e09e02, rmse: 6.6481
run id: 938c909b813d4638b081bf1fdb20ea86, rmse: 6.6481


In [34]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")

In [35]:
run_id = "20689bc665c94e32b55c2c2346e09e02"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-experiment")

2023/06/26 14:09:54 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/06/26 14:09:54 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Successfully registered model 'nyc-taxi-experiment'.
2023/06/26 14:09:55 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-experiment, version 1
Created version '1' of model 'nyc-taxi-experiment'.


<ModelVersion: aliases=[], creation_timestamp=1687763395009, current_stage='None', description=None, last_updated_timestamp=1687763395009, name='nyc-taxi-experiment', run_id='20689bc665c94e32b55c2c2346e09e02', run_link=None, source='/Users/lap02105/workking_space/learn_mlops/week_2/mlruns/1/20689bc665c94e32b55c2c2346e09e02/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [42]:
model_name = "ny-taxi-model"
latest_version = client.get_latest_versions(name=model_name)
for item in latest_version:
    print(f"version: {item.version}, stage: {item.current_stage}")

version: 1, stage: None
version: 3, stage: Archived
version: 4, stage: Staging


In [44]:
client.transition_model_version_stage(
    name=model_name,
    version=4,
    stage="Staging",
    archive_existing_versions=False
)

<ModelVersion: aliases=[], creation_timestamp=1687761370568, current_stage='Staging', description='', last_updated_timestamp=1687763697531, name='ny-taxi-model', run_id='20689bc665c94e32b55c2c2346e09e02', run_link='', source='/Users/lap02105/workking_space/learn_mlops/week_2/mlruns/1/20689bc665c94e32b55c2c2346e09e02/artifacts/models_mlfow', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [46]:
from datetime import datetime

date = datetime.today().date()
model_version = 4
new_stage = "Staging"
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1687761370568, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2023-06-26', last_updated_timestamp=1687764246298, name='ny-taxi-model', run_id='20689bc665c94e32b55c2c2346e09e02', run_link='', source='/Users/lap02105/workking_space/learn_mlops/week_2/mlruns/1/20689bc665c94e32b55c2c2346e09e02/artifacts/models_mlfow', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [49]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)

    df["duration"] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds()/60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']

    df[categorical] = df[categorical].astype(str)
    return df

def preprocess(df, dv):
    df["PU_DO"] = df["PULocationID"] + "_" + df["DOLocationID"]
    categorical = ["PU_DO"]
    numerical = ["trip_distance"]
    train_dicts = df[categorical + numerical].to_dict(orient="records")
    return dv.transform(train_dicts)

def test_model(stage, x_test, y_test, name="ny-taxi-model"):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(x_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [50]:
df = read_dataframe("../dataset/green_tripdata_2021-03.parquet")

In [54]:
client.download_artifacts(run_id="20689bc665c94e32b55c2c2346e09e02", path="preprocessor", dst_path=".")

  client.download_artifacts(run_id="20689bc665c94e32b55c2c2346e09e02", path="preprocessor", dst_path=".")


'/Users/lap02105/workking_space/learn_mlops/week_2/preprocessor'

In [55]:
import pickle
with open("preprocessor/preprocessor.b", "rb") as file_in:
    dv = pickle.load(file_in)

In [56]:
x_test = preprocess(df, dv)

In [57]:
target = "duration"
y_test = df[target].values

In [59]:
%time test_model(name=model_name, stage="Production", x_test=x_test, y_test=y_test)

 - mlflow (current: 2.4.1, required: mlflow==2.4)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: user 287 ms, sys: 98.3 ms, total: 386 ms
Wall time: 1.8 s


{'rmse': 6.803432913478844}