In [1]:
from pathlib import Path
from datetime import datetime
import yaml
import mlflow
import numpy as np
import pandas as pd
from prefect import flow, task, get_run_logger
from mlflow.entities import ViewType
from mlflow.tracking import MlflowClient
from optuna.samplers import TPESampler
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope
from sklearn.metrics import mean_squared_error
from mlflow.models.signature import infer_signature
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
MLFLOW_TRACKING_URI="sqlite:////home/kaustubh/mlops_zoomcamp/final_project/project_1/mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [13]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("training-pipeline")

<Experiment: artifact_location='/home/kaustubh/mlops_zoomcamp/final_project/project_1/mlruns/1', creation_time=1722408634905, experiment_id='1', last_update_time=1722408634905, lifecycle_stage='active', name='training-pipeline', tags={}>

In [14]:
logged_model = "runs:/48d9511ef73d4aa2b3e9987aebd5f289/model"

In [15]:
import mlflow.sklearn
model = mlflow.sklearn.load_model(logged_model)

In [21]:
df = pd.read_parquet("/home/kaustubh/mlops_zoomcamp/final_project/project_1/data/processed/train.parquet")

In [22]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight,Age
0,M,1.7,1.3625,0.4375,50.391236,20.000572,10.701936,13.182517,17.0
1,M,1.425,1.1125,0.35,22.41028,9.525432,5.173784,5.953395,9.0
2,I,0.9875,0.7375,0.2375,7.90951,3.061746,1.516698,2.26796,7.0
3,F,1.5625,1.225,0.3875,31.18445,14.033003,6.307764,9.355335,10.0
4,I,1.0875,0.8125,0.2625,11.892615,5.499803,2.849125,3.543687,12.0


In [20]:
y_train = df["Age"].values

In [18]:
y_train

array([17.,  9.,  7., ..., 10.,  9., 13.])

In [6]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight,Age
0,M,1.7,1.3625,0.4375,50.391236,20.000572,10.701936,13.182517,17.0
1,M,1.425,1.1125,0.35,22.41028,9.525432,5.173784,5.953395,9.0
2,I,0.9875,0.7375,0.2375,7.90951,3.061746,1.516698,2.26796,7.0
3,F,1.5625,1.225,0.3875,31.18445,14.033003,6.307764,9.355335,10.0
4,I,1.0875,0.8125,0.2625,11.892615,5.499803,2.849125,3.543687,12.0


In [7]:
X_train = df.drop(columns=['Age'])

In [8]:
X_train.head()

Unnamed: 0,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight
0,M,1.7,1.3625,0.4375,50.391236,20.000572,10.701936,13.182517
1,M,1.425,1.1125,0.35,22.41028,9.525432,5.173784,5.953395
2,I,0.9875,0.7375,0.2375,7.90951,3.061746,1.516698,2.26796
3,F,1.5625,1.225,0.3875,31.18445,14.033003,6.307764,9.355335
4,I,1.0875,0.8125,0.2625,11.892615,5.499803,2.849125,3.543687


In [9]:
with open('/home/kaustubh/mlops_zoomcamp/final_project/project_1/mnt/serve/preprocessor.b','rb') as f:
    le = pickle.load(f)

In [10]:
X_train['Sex'] = le.transform(X_train['Sex'])

In [16]:
model.predict(X_train)

array([13.20472738,  9.42248625,  6.85774597, ..., 10.29351285,
        8.84209433, 11.08322771])

In [14]:
def download_artifacts(run_id, artifact_path, dst_path):
    """
    Download the artifact(dict vectorizer) from the model registry
    Args:
        run_id (str): run_id of the model
        artifact_path (str): artifact path in the model registry
        dst_path (str): destination path in the local
    """
    mlflow.artifacts.download_artifacts(
        run_id=run_id, artifact_path=artifact_path, dst_path=dst_path
    )

In [12]:
run_id = '48d9511ef73d4aa2b3e9987aebd5f289'

In [13]:
artifact_path="preprocessor.b"

In [15]:
download_artifacts(
                run_id=run_id,
                artifact_path=artifact_path,
                dst_path="/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline"
            )

preprocessor_path = artifact_path

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 661.04it/s] 


In [23]:
model_name ="crab-age-predictor"
experiment_name = "training-pipeline"
mlflow_tracking_uri = "sqlite:////home/kaustubh/mlops_zoomcamp/final_project/project_1/mlflow.db"

In [24]:
def get_prod_run_id(tracking_uri, model_name, stage="Production"):
    """
    get the latest production run_id from model registry
    Args:
        tracking_uri (str): tracking uri of mlflow server
        model_name (str): experiment_name
        stage (str, optional): Staging or Production. Defaults to "Production".

    Returns:
        str: run_id of the latest production model
    """
    client = MlflowClient(tracking_uri=tracking_uri)
    model_metadata = client.get_latest_versions(name=model_name, stages=[stage])[0]
    run_id = model_metadata.run_id

    return run_id

In [25]:
def get_latest_run_id(mlflow_tracking_uri,model_name):
        """
        Get the run_id of the production model inside the model registry.
        Returns:
            str: run_id of the latest production model
        """
        run_id = get_prod_run_id(
            tracking_uri=mlflow_tracking_uri,
            model_name=model_name,
        )

        return run_id

In [26]:
run = get_latest_run_id(mlflow_tracking_uri,model_name)

  model_metadata = client.get_latest_versions(name=model_name, stages=[stage])[0]


In [27]:
print(run)

332e550aea824c109797dee0b58df543


In [32]:
run_id = "332e550aea824c109797dee0b58df543"

In [2]:
tracking_uri="sqlite:////home/kaustubh/mlops_zoomcamp/final_project/project_1/mlflow.db"
mlflow.set_tracking_uri(tracking_uri)

In [3]:
import mlflow
logged_model = 'runs:/332e550aea824c109797dee0b58df543/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [4]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: 332e550aea824c109797dee0b58df543

In [29]:
def get_model(run_id):
    """
    load the model from the model registry
    Args:
        run_id (str): run_id of the model
    Returns:
        PyFuncModel: loaded model from the model registry
    """

    logged_model = f"runs:/{run_id}/model"

    load_model = mlflow.pyfunc.load_model(logged_model)

    return load_model

In [30]:
def load_model(run_id):
        """
        Load the model from the model registry.
        Args:
            run_id (str): run_id of the model

        Returns:
            PyFuncModel: loaded model from the model registry
        """
        load_model = get_model(run_id)

        return load_model


In [7]:
import pickle

In [6]:
def download_artifacts(run_id, artifact_path, dst_path):
    """
    Download the artifact(dict vectorizer) from the model registry
    Args:
        run_id (str): run_id of the model
        artifact_path (str): artifact path in the model registry
        dst_path (str): destination path in the local
    """
    mlflow.artifacts.download_artifacts(
        run_id=run_id, artifact_path=artifact_path, dst_path=dst_path
    )

In [12]:
def load_dv(
        run_id, artifact_path="preprocessor.b", is_test_service=False
    ):
        # pylint: disable=invalid-name
        """
        Download the Preprocessor and load it from the local path.
        Args:
            run_id (str): run_id of the model
            artifact_path (str, optional):  artifact path in the model registry.
                                            Defaults to "artifact/preprocessor.b".
        Returns:
            Preprocessor: A Preprocessor
        """
        if not is_test_service:
            download_artifacts(
                run_id=run_id,
                artifact_path=artifact_path,
                dst_path="/home/kaustubh/mlops_zoomcamp/final_project/project_1/mnt/serve",
            )

            preprocessor_path = os.path.join(self.local_serve_folder, artifact_path)
        else:
            directory = Path(__file__).resolve().parent
            preprocessor_path = directory / artifact_path

        with open(preprocessor_path, "rb") as f_in:
            dv = pickle.load(f_in)

        return dv

In [16]:
import pandas as pd

# Your dictionary
data = {
    'Sex': 'M',
    'Length': 1.575,
    'Diameter': 1.225,
    'Height': 0.375,
    'Weight': 31.226974,
    'Shucked Weight': 12.303683,
    'Viscera Weight': 6.321938,
    'Shell Weight': 9.63883
}

# Convert the dictionary to a pandas DataFrame
df = pd.DataFrame([data])

# Display the DataFrame


(1, 8)

In [13]:
run_id = "332e550aea824c109797dee0b58df543"

In [14]:
dv = load_dv(run_id)

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 607.52it/s] 


NameError: name 'os' is not defined

In [3]:
client.search_experiments()

[<Experiment: artifact_location='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1', creation_time=1722399447940, experiment_id='1', last_update_time=1722399447940, lifecycle_stage='active', name='training-pipeline', tags={}>,
 <Experiment: artifact_location='/home/kaustubh/mlops_zoomcamp/final_project/project_1/mnt/mlruns/artifacts/0', creation_time=1722399437286, experiment_id='0', last_update_time=1722399437286, lifecycle_stage='active', name='Default', tags={}>]

In [19]:
def get_latest_version_model(model_name="car_price_prediction", stage="production"):
    """
    Get the latest version of the production model in the model registry
    Args:
        model_name (str, optional): model name used in mlflow.
                                    Defaults to "crag-age-predictor".
        stage (str, optional): stage inside mlflow model registry. Defaults to "production".

    Returns:
        str: the latest version in the mlflow model registry
    """
    latest_version = client.get_latest_versions(name=model_name, stages=[stage])

    return latest_version

In [20]:
prod_model = get_latest_version_model()

  latest_version = client.get_latest_versions(name=model_name, stages=[stage])


In [21]:
prod_model

[]

In [22]:
register_models = client.search_registered_models()

In [23]:
register_models

[<RegisteredModel: aliases={}, creation_timestamp=1721794459060, description=None, last_updated_timestamp=1721794459084, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1721794459084, current_stage='None', description=None, last_updated_timestamp=1721794459084, name='car_price_prediction', run_id='120895adb5d24be6b750a32fb133ec4e', run_link=None, source='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/120895adb5d24be6b750a32fb133ec4e/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>], name='car_price_prediction', tags={}>]

In [5]:
model_name="crab-age-predictor"

lastest_versions=client.get_latest_versions(name=model_name)

for version in lastest_versions:
    print(f"version:{version.version}, stage:{version.current_stage}")

version:2, stage:None


  lastest_versions=client.get_latest_versions(name=model_name)


In [19]:
client.transition_model_version_stage(
        name=model_name,
        version=2,
        stage="Production",
        archive_existing_versions=True,
    )

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1722403623607, current_stage='Production', description=None, last_updated_timestamp=1722407361728, name='crab-age-predictor', run_id='a19a76855a0c438cad62e61ad79028f1', run_link=None, source='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/a19a76855a0c438cad62e61ad79028f1/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [1]:
import os
os.environ.get("HOME")

'/home/kaustubh'

In [3]:
import json
with open("../data/raw/sample_data.json", "r", encoding="utf-8") as f_in:
        payloads = json.load(f_in)

for payload in payloads:
        print(payload)
print(payloads)

[{'Sex': 'M', 'Length': 1.575, 'Diameter': 1.225, 'Height': 0.375, 'Weight': 31.226974, 'Shucked Weight': 12.303683, 'Viscera Weight': 6.321938, 'Shell Weight': 9.63883}, {'Sex': 'I', 'Length': 1.2375, 'Diameter': 1.0, 'Height': 0.375, 'Weight': 21.885814, 'Shucked Weight': 7.654365, 'Viscera Weight': 3.798833, 'Shell Weight': 7.654365}, {'Sex': 'F', 'Length': 1.45, 'Diameter': 1.1625, 'Height': 0.4125, 'Weight': 28.250277, 'Shucked Weight': 11.127179, 'Viscera Weight': 7.016501, 'Shell Weight': 7.257472}]


In [5]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse < 6.5",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=20,
    order_by=["metrics.rmse ASC"] 
)

In [6]:
for run in runs:
    
    
    print(f"run-id : {run.info.run_id}, rmse : {run.data.metrics}" , end="\n\n")

run-id : 3c70caddcbb6457e88b7447e04fbbb1c, rmse : {'training_mean_squared_error': 2.4953468130878234, 'training_mean_absolute_error': 1.1053483682311869, 'training_r2_score': 0.7599283742668574, 'training_root_mean_squared_error': 1.5796666778430897, 'training_score': 0.7599283742668574, 'rmse': 1.9686371712050197}

run-id : be8c8de37a4c4833bd24be24e2670f62, rmse : {'training_mean_squared_error': 2.4953468130878234, 'training_mean_absolute_error': 1.1053483682311867, 'training_r2_score': 0.7599283742668574, 'training_root_mean_squared_error': 1.5796666778430897, 'training_score': 0.7599283742668574, 'rmse': 1.9686371712050197}

run-id : 4d2c2a72103c48a5a64fa6dcd2c9115c, rmse : {'training_mean_squared_error': 1.8218383920209247, 'training_mean_absolute_error': 0.9251408184020271, 'training_r2_score': 0.8247250833825781, 'training_root_mean_squared_error': 1.3497549377649725, 'training_score': 0.8247250833825781, 'rmse': 1.9871548286340732}

run-id : 9d3e2edb34e2477db484947712310383, rms

In [7]:
for run in runs:
    
    
    print(f"run-id : {run.info.run_id}, rmse : {run.data.params}" , end="\n\n")

run-id : 3c70caddcbb6457e88b7447e04fbbb1c, rmse : {'bootstrap': 'True', 'ccp_alpha': '0.0', 'criterion': 'squared_error', 'max_depth': '15', 'max_features': '1.0', 'max_leaf_nodes': 'None', 'max_samples': 'None', 'min_impurity_decrease': '0.0', 'min_samples_leaf': '4', 'min_samples_split': '2', 'min_weight_fraction_leaf': '0.0', 'monotonic_cst': 'None', 'n_estimators': '34', 'n_jobs': '-1', 'oob_score': 'False', 'random_state': '42', 'verbose': '0', 'warm_start': 'False'}

run-id : be8c8de37a4c4833bd24be24e2670f62, rmse : {'bootstrap': 'True', 'ccp_alpha': '0.0', 'criterion': 'squared_error', 'max_depth': '15', 'max_features': '1.0', 'max_leaf_nodes': 'None', 'max_samples': 'None', 'min_impurity_decrease': '0.0', 'min_samples_leaf': '4', 'min_samples_split': '2', 'min_weight_fraction_leaf': '0.0', 'monotonic_cst': 'None', 'n_estimators': '34', 'n_jobs': '-1', 'oob_score': 'False', 'random_state': '42', 'verbose': '0', 'warm_start': 'False'}

run-id : 4d2c2a72103c48a5a64fa6dcd2c9115c, r

In [8]:
params = run.data.params

In [9]:
params

{'bootstrap': 'True',
 'ccp_alpha': '0.0',
 'criterion': 'squared_error',
 'max_depth': '4',
 'max_features': '1.0',
 'max_leaf_nodes': 'None',
 'max_samples': 'None',
 'min_impurity_decrease': '0.0',
 'min_samples_leaf': '4',
 'min_samples_split': '2',
 'min_weight_fraction_leaf': '0.0',
 'monotonic_cst': 'None',
 'n_estimators': '16',
 'n_jobs': '-1',
 'oob_score': 'False',
 'random_state': '42',
 'verbose': '0',
 'warm_start': 'False'}

In [10]:
RF_PARAMS = ['max_depth', 'n_estimators', 'min_samples_split', 'min_samples_leaf', 'random_state']

In [11]:
for param in RF_PARAMS:
            params[param] = int(params[param])
            

In [12]:
params

{'bootstrap': 'True',
 'ccp_alpha': '0.0',
 'criterion': 'squared_error',
 'max_depth': 4,
 'max_features': '1.0',
 'max_leaf_nodes': 'None',
 'max_samples': 'None',
 'min_impurity_decrease': '0.0',
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': '0.0',
 'monotonic_cst': 'None',
 'n_estimators': 16,
 'n_jobs': '-1',
 'oob_score': 'False',
 'random_state': 42,
 'verbose': '0',
 'warm_start': 'False'}

In [16]:
RF_PARAMS = ['max_depth', 'n_estimators', 'min_samples_split', 'min_samples_leaf', 'random_state']

# Convert specified parameters to int
for param in RF_PARAMS:
    params[param] = int(params[param])

# Print only the specified parameters
filtered_params = {param: params[param] for param in RF_PARAMS}
print(filtered_params)


{'max_depth': 4, 'n_estimators': 16, 'min_samples_split': 2, 'min_samples_leaf': 4, 'random_state': 42}


In [6]:
experiment_name = "training-pipeline"

In [7]:
experiment = client.get_experiment_by_name(experiment_name)
best_model_meta_data = client.search_runs(
        experiment_ids=experiment.experiment_id,
        run_view_type=ViewType.ACTIVE_ONLY,
        order_by=["metrics.rmse ASC"],
        max_results=1,
    )[0]

In [8]:
best_model_meta_data

<Run: data=<RunData: metrics={'rmse': 1.9593159814429755}, params={'max_depth': '13',
 'min_samples_leaf': '4',
 'min_samples_split': '7',
 'n_estimators': '43',
 'random_state': '42'}, tags={'developer': 'kaustubh',
 'mlflow.runName': 'salty-zebra-692',
 'mlflow.source.git.commit': 'f909de3afdc3eacf8a5fdcb54b694b44ae15f236',
 'mlflow.source.name': 'training_pipeline.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'kaustubh',
 'model': 'RF'}>, info=<RunInfo: artifact_uri='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/15d487ad370c4e11bb81beef731b7e88/artifacts', end_time=1721739517249, experiment_id='1', lifecycle_stage='active', run_id='15d487ad370c4e11bb81beef731b7e88', run_name='salty-zebra-692', run_uuid='15d487ad370c4e11bb81beef731b7e88', start_time=1721739500832, status='FINISHED', user_id='kaustubh'>, inputs=<RunInputs: dataset_inputs=[]>>

In [7]:
register_models = client.search_registered_models()

In [8]:
register_models

[<RegisteredModel: aliases={}, creation_timestamp=1722399539416, description=None, last_updated_timestamp=1722403814274, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1722399539447, current_stage='Production', description=None, last_updated_timestamp=1722403814274, name='crab-age-predictor', run_id='aaf530d600bd428488dbd5f593d1f23f', run_link=None, source='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/aaf530d600bd428488dbd5f593d1f23f/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>,
  <ModelVersion: aliases=[], creation_timestamp=1722403623607, current_stage='None', description=None, last_updated_timestamp=1722403623607, name='crab-age-predictor', run_id='a19a76855a0c438cad62e61ad79028f1', run_link=None, source='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/a19a76855a0c438cad62e61ad79028f1/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>],

In [18]:
for model in register_models:
    print(f"Model Name: {model.latest_versions[0].version}")
    # for version in model.latest_versions:

    #     print(f"    Version: {version.version}, Stage: {version.current_stage}, Status: {version.status}")


Model Name: 1


In [11]:
def get_latest_version_model(model_name="crab-age-predictor", stage="production"):
    """
    Get the latest version of the production model in the model registry
    Args:
        model_name (str, optional): model name used in mlflow.
                                    Defaults to "crag-age-predictor".
        stage (str, optional): stage inside mlflow model registry. Defaults to "production".

    Returns:
        str: the latest version in the mlflow model registry
    """
    latest_version = client.get_latest_versions(name=model_name, stages=[stage])

    return latest_version

In [12]:
prod_model = get_latest_version_model()

  latest_version = client.get_latest_versions(name=model_name, stages=[stage])


In [13]:
def compare_models(prod_model, best_model_meta_data):
    """
    Comparison between the latest production model and the best model
    from the running experiment

    Returns:
        bool: A boolean value to register the best model or not
    """
    prod_model_run_id = prod_model[0].run_id
    prod_model_metrics_data = client.get_metric_history(
        prod_model_run_id, key="rmse"
    )
    prod_model_metrics = prod_model_metrics_data[0].value

    best_model_metrics = best_model_meta_data.data.metrics["rmse"]

    # True if current best model metrics is better than production metrics
    is_register = best_model_metrics > prod_model_metrics

    return is_register

In [14]:
def register_best_model(model_meta_data, model_name="crab-age-predictor"):
    """
    Register the best model from the training trials in the model registry
    Args:
        model_meta_data (): metadata about best model in the model registry
        model_name (str, optional): model name used in mlflow.
                                    Defaults to "crag-age-predictor".

    Returns:
        metadata about registered model
    """
    best_model_id = model_meta_data.info.run_id
    best_model_uri = f"runs:/{best_model_id}/model"

    reg_model_meta_data = mlflow.register_model(
        model_uri=best_model_uri, name=model_name
    )

    return reg_model_meta_data

In [15]:
def transition_model_stage(
    reg_model_meta_data, model_name="crag-age-predictor", stage="production"
):
    """
    Transitioned the best model from the training trials to the production stage
    Args:
        reg_model_meta_data (): metadata about registered model
        model_name (str, optional): model name used in mlflow.
                                    Defaults to "crag-age-predictor".
        stage (str, optional): stage inside mlflow model registry. Defaults to "production".
    """
    client.transition_model_version_stage(
        name=model_name,
        version=reg_model_meta_data.version,
        stage=stage,
        archive_existing_versions=True,
    )

    date = datetime.today().date()
    client.update_model_version(
        name=model_name,
        version=reg_model_meta_data.version,
        description=f"The model version {reg_model_meta_data.version} "
        f"was transition to {stage} on {date}",
    )

In [17]:
def search_best_model(experiment_name):
    """
    Search the best model from all the trials in the experiment and its metadata

    Args:
        experiement_name (str): An experiment name used in the mlflow

    Returns:
        model_meta_data : metadata about best model in the model registry
    """
    experiment = client.get_experiment_by_name(experiment_name)
    best_model_meta_data = client.search_runs(
        experiment_ids=experiment.experiment_id,
        run_view_type=ViewType.ACTIVE_ONLY,
        order_by=["metrics.rmse ASC"],
        max_results=1,
    )[0]
    return best_model_meta_data

In [18]:
best_model_meta_data = search_best_model(experiment_name)

register_models = client.search_registered_models()
# if len(register_models) == 0:
#         is_register = True
# else:
#         prod_model = get_latest_version_model()
#         is_register = compare_models(
#             prod_model=prod_model, best_model_meta_data=best_model_meta_data
#         )

# if is_register:
#         # logger.info("Registering the best model")
#         reg_model_meta_data = register_best_model(model_meta_data=best_model_meta_data)

#         # logger.info("Transition the best model to the production stage")
#         transition_model_stage(reg_model_meta_data)


In [23]:

reg_model_meta_data = register_best_model(model_meta_data=best_model_meta_data)


Successfully registered model 'crab-age-predictor'.


MlflowException: Run '15d487ad370c4e11bb81beef731b7e88' not found

In [19]:
best_model_meta_data

<Run: data=<RunData: metrics={'rmse': 1.9593159814429755}, params={'max_depth': '13',
 'min_samples_leaf': '4',
 'min_samples_split': '7',
 'n_estimators': '43',
 'random_state': '42'}, tags={'developer': 'kaustubh',
 'mlflow.runName': 'salty-zebra-692',
 'mlflow.source.git.commit': 'f909de3afdc3eacf8a5fdcb54b694b44ae15f236',
 'mlflow.source.name': 'training_pipeline.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'kaustubh',
 'model': 'RF'}>, info=<RunInfo: artifact_uri='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/15d487ad370c4e11bb81beef731b7e88/artifacts', end_time=1721739517249, experiment_id='1', lifecycle_stage='active', run_id='15d487ad370c4e11bb81beef731b7e88', run_name='salty-zebra-692', run_uuid='15d487ad370c4e11bb81beef731b7e88', start_time=1721739500832, status='FINISHED', user_id='kaustubh'>, inputs=<RunInputs: dataset_inputs=[]>>

In [20]:
register_models

[<RegisteredModel: aliases={}, creation_timestamp=1721714753912, description=None, last_updated_timestamp=1721714753932, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1721714753932, current_stage='None', description=None, last_updated_timestamp=1721714753932, name='crab-age-predictor', run_id='f65236433a9a457095ad396f4ec3ae9f', run_link=None, source='/home/kaustubh/mlops_zoomcamp/final_project/project_1/pipeline/mlruns/1/f65236433a9a457095ad396f4ec3ae9f/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>], name='crab-age-predictor', tags={}>]

In [21]:
prod_model = get_latest_version_model()

  latest_version = client.get_latest_versions(name=model_name, stages=[stage])


In [22]:
prod_model

[]

In [4]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse < 6.5",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=20,
    order_by=["metrics.rmse ASC"] 
)

In [5]:
for run in runs:
    
    
    print(f"run-id : {run.info.run_id}, rmse : {run.data.metrics}" , end="\n\n")

run-id : 15d487ad370c4e11bb81beef731b7e88, rmse : {'rmse': 1.9593159814429755}

run-id : f65236433a9a457095ad396f4ec3ae9f, rmse : {'rmse': 1.9593159814429755}

run-id : b8d85576e56e4d52af5750504e195317, rmse : {'rmse': 1.9593159814429755}

run-id : 4c9920e50d62474b933b89d68bff695b, rmse : {'rmse': 1.967766410886033}

run-id : d306bfe7d3474445b3231f1c1ba5972f, rmse : {'rmse': 1.967766410886033}

run-id : 1849064b41ff40a1b816a687c5d5d8cf, rmse : {'rmse': 1.967766410886033}

run-id : c9b3257fc1f049b78d3558b5cfc66017, rmse : {'rmse': 2.1628432318151782}

run-id : 24ca0440ac6f44158c22e648646ec553, rmse : {'rmse': 2.1628432318151782}

run-id : fe65872598dc41c5a52a995165f36c75, rmse : {'rmse': 2.1628432318151782}

run-id : ae40b93ad3f7464e9be4d25c88c74c3f, rmse : {'rmse': 2.401021703429892}

run-id : aa5d208272bd4985b68f0ee8a2d32e4c, rmse : {'rmse': 2.401021703429892}

run-id : 8ebce9d1173644b28fb75c3132424af6, rmse : {'rmse': 2.401021703429892}

