In [1]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [2]:
client.search_experiments()

[<Experiment: artifact_location='file:///c:/Users/crab/py2/mlops-zoomcamp/cohorts/2024/02-experiment-tracking/code/mlruns/4', creation_time=1716554906906, experiment_id='4', last_update_time=1716554906906, lifecycle_stage='active', name='a-experiment', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/crab/py2/mlops-zoomcamp/cohorts/2024/02-experiment-tracking/code/mlruns/3', creation_time=1716532631531, experiment_id='3', last_update_time=1716532631531, lifecycle_stage='active', name='nyc-taxi', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/crab/py2/mlops-zoomcamp/cohorts/2024/02-experiment-tracking/code/mlruns/0', creation_time=1716532340085, experiment_id='0', last_update_time=1716532340085, lifecycle_stage='active', name='Default', tags={}>]

In [3]:
client.create_experiment(name="a-experiment")

'4'

In [3]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='3',
    filter_string="metrics.rmse < 6.8",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]  
)

In [4]:
for run in runs:
    print(f'run id: {run.info.run_id}, rmse: {run.data.metrics}') 

run id: a659b685bcb64c08be9d0af6fc9020ce, rmse: {'rmse': 6.313315796028479}
run id: 0f6aa7c7fedc423584dabd6b3d656280, rmse: {'rmse': 6.313315796028479}
run id: d80f9b765a7a4a848036666e43afe575, rmse: {'rmse': 6.313315796028479}
run id: b9400029a4d243f598abda6018dce305, rmse: {'rmse': 6.313315796028479}
run id: 2c5d833648c8454ab3191fe2a920caeb, rmse: {'rmse': 6.313315796028479}


In [5]:
import mlflow

mlflow.set_tracking_uri=(MLFLOW_TRACKING_URI)

In [6]:
run_id = "e57bd274605443459c17ed734c9fdaa5"
model_uri = f"runs/{run_id}/model"

mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '8' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1716558997892, current_stage='None', description=None, last_updated_timestamp=1716558997892, name='nyc-taxi-regressor', run_id=None, run_link=None, source='runs/e57bd274605443459c17ed734c9fdaa5/model', status='READY', status_message=None, tags={}, user_id=None, version=8>

In [7]:
model_name = "nyc-taxi-regressor"

latest_version = client.get_latest_versions(name=model_name)

for version in latest_version:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: None
version: 2, stage: Staging


  latest_version = client.get_latest_versions(name=model_name)


In [29]:
client.transition_model_version_stage(
    name = model_name,
    version = 2,
    stage="Production",
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=['Prod'], creation_timestamp=1716554135085, current_stage='Production', description='The model version 2 was transisitioned to Production on 2024-05-24', last_updated_timestamp=1716559277698, name='nyc-taxi-regressor', run_id='e57bd274605443459c17ed734c9fdaa5', run_link='', source='file:///c:/Users/crab/py2/mlops-zoomcamp/cohorts/2024/02-experiment-tracking/code/mlruns/3/e57bd274605443459c17ed734c9fdaa5/artifacts/model', status='READY', status_message=None, tags={'model ': 'gradientboostregressor'}, user_id=None, version=2>

In [30]:
from datetime import datetime

date = datetime.today().date()

model_version = 2
new_stage = 'Production'
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f'The model version {model_version} was transisitioned to {new_stage} on {date}'
)

<ModelVersion: aliases=['Prod'], creation_timestamp=1716554135085, current_stage='Production', description='The model version 2 was transisitioned to Production on 2024-05-24', last_updated_timestamp=1716559281368, name='nyc-taxi-regressor', run_id='e57bd274605443459c17ed734c9fdaa5', run_link='', source='file:///c:/Users/crab/py2/mlops-zoomcamp/cohorts/2024/02-experiment-tracking/code/mlruns/3/e57bd274605443459c17ed734c9fdaa5/artifacts/model', status='READY', status_message=None, tags={'model ': 'gradientboostregressor'}, user_id=None, version=2>

In [11]:
def assign_alias_to_stage(model_name, stage, alias):
    """
    Assign an alias to the latest version of a registered model within a specified stage.

    :param model_name: The name of the registered model.
    :param stage: The stage of the model version for which the alias is to be assigned. Can be
                "Production", "Staging", "Archived", or "None".
    :param alias: The alias to assign to the model version.
    :return: None
    """
    latest_mv = client.get_latest_versions(model_name, stages=[stage]) [0]
    client.set_registered_model_alias(model_name, alias, latest_mv.version)

In [43]:
assign_alias_to_stage(model_name=model_name, stage="Production", alias="Prod")

  latest_mv = client.get_latest_versions(model_name, stages=[stage]) [0]


In [45]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [18]:
df = read_dataframe("data/green_tripdata_2021-03.parquet")

In [19]:
client.download_artifacts(run_id='e57bd274605443459c17ed734c9fdaa5', path='preprocessor', dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'c:\\Users\\crab\\py2\\mlops-zoomcamp\\cohorts\\2024\\02-experiment-tracking\\code\\preprocessor'

In [20]:
import pickle

with open('preprocessor/preprocessor.b', 'rb') as f_in:
    dv = pickle.load(f_in)

In [21]:
X_test = preprocess(df,dv)

In [22]:
target = "duration"
y_test = df[target].values

In [50]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: None
version: 2, stage: Production


  latest_versions = client.get_latest_versions(name=model_name)


In [51]:
model_name1 = model_name+'@Prod'

print(model_name1)

%time test_model(name=model_name,  stage = "Production", X_test=X_test, y_test=y_test)



nyc-taxi-regressor@Prod


MlflowException: No versions of model with name 'nyc-taxi-regressor' and stage 'Production' found