# Model Registry

In [1]:
import mlflow
from mlflow.client import MlflowClient
from mlflow.entities import ViewType



In [2]:
TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=TRACKING_URI)
mlflow.set_tracking_uri(TRACKING_URI)

## Search runs

In [3]:
runs = client.search_runs(
  experiment_ids='1',
  filter_string="metrics.rmse < 6.305",
  run_view_type=ViewType.ACTIVE_ONLY,
  max_results=5,
  order_by=["metrics.rmse ASC"]
)

In [4]:
for run in runs:
  print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 9280ec20967743fdab9bd73823d4b27d, rmse: 6.3003
run id: 2e2d034044a94dc193b51f8efd7e3226, rmse: 6.3018
run id: f95ffeb932004e0785baeb770812cf3a, rmse: 6.3027


## Registering models

In [17]:
run_id = runs[0].info.run_id
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(
  model_uri=model_uri,
  name="nyc-taxi-regressor"
)

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '1' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1722079187924, current_stage='None', description=None, last_updated_timestamp=1722079187924, name='nyc-taxi-regressor', run_id='9280ec20967743fdab9bd73823d4b27d', run_link=None, source='/Users/bastienwinant/Desktop/Projects/mlops-zoomcamp/02-experiment-tracking/mlruns/1/9280ec20967743fdab9bd73823d4b27d/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [5]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

  latest_versions = client.get_latest_versions(name=model_name)


In [6]:
for version in latest_versions:
  print(f"version: {version.version}, stage: {version.tags}")

version: 1, stage: {}


In [7]:
client.set_model_version_tag(
  name = "nyc-taxi-regressor",
  version = '1',
  key="status",
  value="champion"
)

## MLFlow in Practice
### Scenario 1: Individual Kaggle competitor
- no need for remote tracking server, saving locally is enough
- no deployment => no model registry
### Scenario 2: Cross-functional team with 1 data scientist
- requirement for sharing
- no need for remote tracking server, locally is enough
- remote registry is good, but can be local
### Scenario 3: Multiple data scientists working on multiple teams
- remote tracking server is required
- remote registry is required

Components:
1. __backend store:__ where mlflow stores all the metadata about experiments (metrics, parameters,...) => local or DB
2. __artifacts store:__ local or remote storage (S3)
3. __tracking server:__ locally or on a remote host