# Model Validation

In [0]:
import pandas as pd
import mlflow
from mlflow import MlflowClient
from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository

from databricks.feature_engineering import FeatureEngineeringClient

import pyspark.sql.functions as F
from pyspark.sql.types import StructType

from sklearn.metrics import confusion_matrix
import plotly.express as px



current_user = dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()
xp_name = "dbdemos_mlops_churn_demo_experiment"
xp_path = f"/Users/{current_user}/dbdemos_mlops"
mlflow.set_experiment(f"{xp_path}/{xp_name}")

model_name = 'advanced_mlops_churn'

In [0]:
%%sql

use catalog main;

use schema dbdemos_mlops;

In [0]:
best_model = mlflow.search_runs(
    order_by=["metrics.test_f1_score DESC"],
    max_results=1,
    filter_string=f"status = 'FINISHED' and run_name='mlops_best_run'"
)

run_id = best_model.iloc[0]['run_id']
model_details = mlflow.register_model(f'runs:/{run_id}/model', f'{model_name}')

best_model

**register the model**

In [0]:
client = MlflowClient()

client.update_registered_model(
  name=model_details.name,
  description="This model predicts whether a customer will churn or not",
)

In [0]:
best_score = best_model['metrics.test_f1_score'].values[0]
run_name = best_model['tags.mlflow.runName'].values[0]
version_desc = f"This model version has an F1 validation metric of {round(best_score,4)*100}%. Follow the link to its training run for more details."

client.update_model_version(
  name=model_details.name,
  version=model_details.version,
  description=version_desc
)

client.set_model_version_tag(
  name=model_details.name,
  version=model_details.version,
  key="f1_score",
  value=f"{round(best_score,4)}"
)

### Set the latest model version as the Challenger model
Challenger models are the models to replace the Champion model if it passes its performance evaluation

In [0]:
client.set_registered_model_alias(
  name=f"advanced_mlops_churn",
  alias="Challenger",
  version=model_details.version
)


### Evaluating the model using a Business metric - Revenue Impact
Rather than evaluating model performance using metrics that have no business meaning, in this section the model will be evalauted based on its impact on Revenue Genrated in dollars

In [0]:
model_alias = 'Challenger'

requirements_path = ModelsArtifactRepository(f'models:/{model_name}@{model_alias}').download_artifacts(artifact_path='requirements.txt')

model_details = client.get_model_version_by_alias(model_name, model_alias)
model_version = int(model_details.version)
run_info = client.get_run(run_id=model_details.run_id)

print(f"Validating {model_alias} model for {model_name} on model version {model_version}")

In [0]:
fe = FeatureEngineeringClient()

model_uri = f"models:/{model_name}@{model_alias}"
label_col = "churn"

In [0]:
validation_df = spark.table('advanced_churn_label_table').filter("split='validate'")

def predict_churn(validation_df, model_alias):
    features_w_preds = fe.score_batch(df=validation_df, model_uri=f"models:/{model_name}@{model_alias}", 
                                      result_type=validation_df.schema[label_col].dataType)

    return features_w_preds

In [0]:
cost_of_customer_churn = 5_000 # $5k per customer churned
cost_of_retention = 1_000 # $1k discount given to customer

cost_of_true_negative = 0
cost_of_false_negative = cost_of_customer_churn
cost_of_true_positive = cost_of_customer_churn - cost_of_retention
cost_of_false_positive = -cost_of_retention

def get_model_value_in_dollar(model_alias: str) -> float:
    model_predictions = predict_churn(validation_df, model_alias)
    tn, fp, fn, tp = confusion_matrix(model_predictions['churn'], model_predictions['prediction']).ravel()
    revenue_gain = (tn * cost_of_true_negative) + (tp * cost_of_true_positive) + (fp * cost_of_false_positive) + (fn * cost_of_false_negative)

    return revenue_gain


try:
    champion_model = client.get_model_verrsion_by_alias(model_name, 'Champion')
    champion_potential_revenue_gain = get_model_value_in_dollar("Champion")
    challenger_potential_revenue_gain = get_model_value_in_dollar("Challenger")
    
    data = {
        'Model Alias': ['Challenger', 'Champion'],
        'Potential Revenue Gain': [challenger_potential_revenue_gain, champion_potential_revenue_gain]
    }

except Exception as e:
    print('No Champion model found. re-run this cell after promoting a model to Champion to evaluate busines metric')

    data = {
        'Model Alias': ['Challenger', 'Champion'],
        'Potential Revenue Gain': [0.0,] * 2
    }
    challenger_potential_revenue_gain, champion_potential_revenue_gain = 1.0, 0.0

px.bar(data, x='Model Alias', y='Potential Revenue Gain', color='Model Alias',
    labels={'Potential Revenue Gain': 'Revenue Impacted'},
    title='Business Metrics - Revenue Impacted')

In [0]:
if challenger_potential_revenue_gain > champion_potential_revenue_gain:
    print(f"Registering model {model_name} Version {model_version} as Champion!")
    client.set_registered_model_alias(
        name=model_name,
        alias="Champion",
        version=model_version
    )