# Model Validation Notebook
Use this notebook to validate the latest Experiement run's model, compare metrics and the register the model if it is deemed good enough to promote to the next environment.


-----------------------------------
## Step 1
### Install required packages 


In [79]:
import time
import mlflow
from notebookutils import mssparkutils


StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 80, Finished, Available)

In [80]:
# Get the current notebook name as the key to look up other config
NOTEBOOK_NAME = mssparkutils.runtime.context['currentNotebookName']
#**********************************************

sql_query = "SELECT * FROM MLOpsConfig WHERE notebook ='{}'".format(NOTEBOOK_NAME)
df = spark.sql(sql_query)
df2 =  df.toPandas()
EXPERIMENT_NAME = df2.loc[:,"experiment"].values[0]
MODEL_NAME = df2.loc[:,"model"].values[0]


StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 81, Finished, Available)

-----------------------------------
## Step 2
### Extract the latest experiment run.
Extract the latest run from the given experiment.

In [81]:
# Pull the runs from the given experiment
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
df_runs = mlflow.search_runs([experiment_id])

# Pull the top run id based on the top metric
df_runs_filtered = df_runs.dropna(subset=['metrics.xgb_pre_score'])
latest_experiement_run_id = df_runs_filtered.sort_values(['end_time'], ascending=False).iloc[0].run_id

StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 82, Finished, Available)

-----------------------------------
## Step 3
### Find the latest registered model's run id


In [82]:
# Find the latest registered model's run id
model_found = False

# Search registered models for the 
for model in mlflow.search_registered_models():
    if model.name == MODEL_NAME:
        latest_version_run_id = model.latest_versions[0].run_id
        model_found = True
        break



StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 83, Finished, Available)

-----------------------------------
## Step 4
### Pull the metrics for the latest model and the latest experiment run


In [83]:
# Find the latest experiment's metric and the model version metric to be compared
if model_found == True:
    latest_experiement_metric = df_runs_filtered.loc[df_runs_filtered['run_id'] == latest_experiement_run_id].loc[:,"metrics.xgb_pre_score"].values
    latest_version_metric = df_runs_filtered.loc[df_runs_filtered['run_id'] == latest_version_run_id].loc[:,"metrics.xgb_pre_score"].values

StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 84, Finished, Available)

-----------------------------------
## Step 5
### Compare and decide whether or not to promote the model.


In [89]:
if model_found == True:
    print("Latest Experiment Metric: " + str(latest_experiement_metric))
    print("Latest Model Version Metric: " + str(latest_version_metric))

    # Compare and register
    if latest_experiement_metric > latest_version_metric:
        print("Register the new model")
        # Register the model
        registered_model_name = f"{MODEL_NAME}"

        model_uri = "runs:/{}/model".format(latest_experiement_run_id)
        mlflow.register_model(model_uri, registered_model_name)
    else:
        print("Skipping model Registration")

else:
    print("Register the new model")
    # Register the model
    registered_model_name = f"{MODEL_NAME}"



StatementMeta(, 6e4978d9-eecf-4f39-934d-b838057cefc1, 90, Finished, Available)

Latest Experiment Metric: [0.94736842]
Latest Model Version Metric: [0.94736842]
Skipping model regiustration
