# Explore load model information through mlflow pyfunc

In [12]:
import mlflow.pyfunc
import mlflow
import pandas as pd


In [7]:
mlflow.set_tracking_uri("file:///home/louberehc/OCR/projets/7_scoring_model/mlruns")
tracking_uri = mlflow.get_tracking_uri()
tracking_uri

'file:///home/louberehc/OCR/projets/7_scoring_model/mlruns'

In [8]:
def make_model_uri(model_name: str, stage: str):
    return f"models:/{model_name}/{stage}"


def load_model(model_uri):
    """ Load the model from the artifacts in the MLflow model registry """
    return mlflow.pyfunc.load_model(model_uri=model_uri)


def load_data(DATA_PATH):
    """ Load both the features and the target associated to customers """
    df = pd.read_pickle(DATA_PATH).astype("float64")
    target = df.pop('TARGET')
    return df, target

# Load model and data
model_name = "lgbm_test"
stage = "Staging"
APP_DATA_PATH = "/home/louberehc/OCR/projets/7_scoring_model/pickle_files/reduced_data.pkl"

model_uri = make_model_uri(model_name, stage)
requirements = mlflow.pyfunc.get_model_dependencies(model_uri)


2023/09/12 11:04:36 INFO mlflow.pyfunc: To install the dependencies that were used to train the model, run the following command: '%pip install -r /home/louberehc/OCR/projets/7_scoring_model/mlruns/122196375156606001/de0e63da02a1439c8aae7fb7074e33a6/artifacts/lgbm_model/requirements.txt'.


In [5]:
requirements

'/home/louberehc/OCR/projets/7_scoring_model/mlruns/122196375156606001/de0e63da02a1439c8aae7fb7074e33a6/artifacts/lgbm_model/requirements.txt'

In [1]:
!cat '/home/louberehc/OCR/projets/7_scoring_model/mlruns/122196375156606001/de0e63da02a1439c8aae7fb7074e33a6/artifacts/lgbm_model/requirements.txt'

mlflow==2.5.0
cloudpickle==2.2.1
lightgbm==4.0.0
matplotlib==3.7.2
numpy==1.25.2
packaging==23.1
pandas==2.0.3
scikit-learn==1.3.0
scipy==1.11.2
typing-extensions==4.7.1

In [5]:
model = load_model(model_uri)
features, target = load_data(APP_DATA_PATH)
valid_customer_ids = features.index



 - numpy (current: 1.24.4, required: numpy==1.25.2)
 - scipy (current: 1.11.1, required: scipy==1.11.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


# Find the model best threshold in MLflow tracking info

In [32]:
from pprint import pprint
from mlflow import MlflowClient

client = MlflowClient()
for rm in client.search_registered_models():
    pprint(dict(rm), indent=4)



{   'aliases': {},
    'creation_timestamp': 1694423489278,
    'description': '',
    'last_updated_timestamp': 1694424048885,
    'latest_versions': [   <ModelVersion: aliases=[], creation_timestamp=1694423489478, current_stage='Staging', description='A first model built on only 1600 individuals to develop a draft API.', last_updated_timestamp=1694424193969, name='lgbm_test', run_id='de0e63da02a1439c8aae7fb7074e33a6', run_link='', source='file:///home/louberehc/OCR/projets/7_scoring_model/mlruns/122196375156606001/de0e63da02a1439c8aae7fb7074e33a6/artifacts/lgbm_model', status='READY', status_message=None, tags={}, user_id=None, version=1>],
    'name': 'lgbm_test',
    'tags': {}}


In [31]:
client = MlflowClient()
for mv in client.search_model_versions(f"name='lgbm_test'"):
    pprint(dict(mv), indent=4)
    

{   'aliases': [],
    'creation_timestamp': 1694423489478,
    'current_stage': 'Staging',
    'description': 'A first model built on only 1600 individuals to develop a '
                   'draft API.',
    'last_updated_timestamp': 1694424193969,
    'name': 'lgbm_test',
    'run_id': 'de0e63da02a1439c8aae7fb7074e33a6',
    'run_link': '',
    'source': 'file:///home/louberehc/OCR/projets/7_scoring_model/mlruns/122196375156606001/de0e63da02a1439c8aae7fb7074e33a6/artifacts/lgbm_model',
    'status': 'READY',
    'status_message': None,
    'tags': {},
    'user_id': None,
    'version': 1}


In [22]:
mv.run_id

'de0e63da02a1439c8aae7fb7074e33a6'

In [26]:
run = mlflow.get_run(run_id=mv.run_id)
run.data.metrics


{'test_loss_of_income': 0.375,
 'test_AUC': 0.6676575014449674,
 'test_f2': 0.3333333333333333,
 'train_AUC': 0.8113609985542953,
 'train_loss_of_income': 0.21625,
 'train_threshold_loss_of_income': 0.15,
 'train_threshold_f2': 0.15,
 'train_threshold_AUC': 0.13,
 'train_f2': 0.6124999999999999}

In [27]:
run.data.metrics['train_threshold_loss_of_income']

0.15

In [35]:
def get_model_run_id_from_name_stage_version(
    name: str,
    stage: str,
    version: int,
) -> str:
    """ return the model rnu_id from some information of the model registered
    in the model registry.
    
    TODO: add check of name, stage and version and return error accordling
    if no model is found."""
    client = MlflowClient()
    for mv in client.search_model_versions(f"name='{name}'"):
        if (mv.current_stage == stage) and (mv.version == version):
            return mv.run_id
    return None     

In [36]:
get_model_run_id_from_name_stage_version('lgbm_test', 'Staging', 1)

'de0e63da02a1439c8aae7fb7074e33a6'