## Model Drift Detection

This is a notebooks file where I am trying to implement a model drift detection, and log it to MLFlow

It's hard to do it with this dataset as it is inherently static therefore, I am using it just to learn how to use the FastAPI and MLFlow library

In [29]:
import pandas as pd
import numpy as np
import mlflow

from alibi_detect.cd import KSDrift
from sqlalchemy import create_engine

In [30]:
engine = create_engine("sqlite:///input_warehouse.db")

In [31]:
baseline = pd.read_csv(r"../data/processed_data/oversampled_test.csv", index_col=0)
baseline.drop(["TARGET"], axis=1, inplace=True)

keys = list(baseline.columns)
cat_cols = list(baseline.columns[baseline.dtypes == "int64"]) # not used
cont_cols = list(baseline.columns[baseline.dtypes == "float64"]) # not used

baseline = np.array(baseline)

In [32]:
new_data = pd.read_sql("SELECT * FROM model_inputs", engine)
new_data.drop(["y_pred"], axis=1, inplace=True)
new_data = np.array(new_data)

In [33]:
drift_detector = KSDrift(x_ref=baseline, p_val=.05)

In [34]:
preds = drift_detector.predict(new_data)

In [35]:
is_drift = preds['data']['is_drift']
p_val = preds['data']['p_val']

In [36]:
dict_arr = {keys[i]: p_val[i] for i in range(len(keys))}

In [37]:
# Print the results
if is_drift:
    print(f"Feature drift detected with p-value {dict_arr}")
else:
    print("No feature drift detected")

Feature drift detected with p-value {'NAME_CONTRACT_TYPE': 0.0, 'CODE_GENDER': 0.0, 'FLAG_OWN_CAR': 0.0, 'FLAG_OWN_REALTY': 0.0150554, 'CNT_CHILDREN': 2.5082915e-25, 'AMT_INCOME_TOTAL': 0.0, 'AMT_CREDIT': 2.5913689e-05, 'AMT_ANNUITY': 2.857101e-24, 'AMT_GOODS_PRICE': 3.5432568e-15, 'NAME_TYPE_SUITE': 0.0, 'NAME_INCOME_TYPE': 0.0, 'NAME_EDUCATION_TYPE': 0.0, 'NAME_FAMILY_STATUS': 0.0, 'NAME_HOUSING_TYPE': 0.0, 'REGION_POPULATION_RELATIVE': 0.0, 'DAYS_BIRTH': 0.0, 'DAYS_EMPLOYED': 6.123759e-23, 'DAYS_REGISTRATION': 0.64598376, 'DAYS_ID_PUBLISH': 5.81532e-05, 'OWN_CAR_AGE': 0.0, 'FLAG_MOBIL': 0.0, 'FLAG_EMP_PHONE': 0.9952315, 'FLAG_WORK_PHONE': 0.0024543032, 'FLAG_CONT_MOBILE': 1.4781265e-15, 'FLAG_PHONE': 7.861141e-05, 'FLAG_EMAIL': 1.0, 'OCCUPATION_TYPE': 0.0, 'CNT_FAM_MEMBERS': 5.3761394e-30, 'REGION_RATING_CLIENT': 0.8627275, 'REGION_RATING_CLIENT_W_CITY': 0.023070637, 'WEEKDAY_APPR_PROCESS_START': 0.0, 'HOUR_APPR_PROCESS_START': 0.0, 'REG_REGION_NOT_LIVE_REGION': 0.0, 'REG_REGION_NOT

In [None]:
with mlflow.start_run(run_name="home_default_classification"):
    mlflow.log_metric("KS_result", is_drift)
    # mlflow.log_param("dict_pval", dict_arr)