# ML Monitoring - Feature importance weighted drift detection and automated-retraining

## Overview

Every model over time is impacted by model performance decay due to data drift and concept drift. One of many solution is to perform drift detection and set up automated retraining of the model. Drift in every feature does't have the same impact on model performance. In this project we combine feature importance of a particular feature with its drift score obtained through statistical test to determine wether to retrain model or not. Thus model will be retrained only when there is drift on features with higher importance and also when feature with lower importance faces higher data drift.

## Dataset:

* Contains warehouse demand data from 2017-01-01 to 2020-11-15
* Working with preprocessed data
* No data leakage as missing values were filled without requirement of any transformation

## Assumptions

* Initial model deployment on march 2019
* True labels/ actual demand available over the weekend
* Model monitored on weekly basis

## Libraries

In [282]:
import pandas as pd
import numpy as np
from datetime import date,datetime,timedelta
from sklearn.model_selection import train_test_split
from rdt import HyperTransformer
import xgboost as xgb
import optuna
import traceback
import shap
import pickle
import logging
from importlib import reload
import logging
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
from evidently import ColumnMapping
from evidently.analyzers.stattests import StatTest
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataDriftTab, CatTargetDriftTab, NumTargetDriftTab, RegressionPerformanceTab
from evidently.options import DataDriftOptions
from evidently.model_profile import Profile
from evidently.model_profile.sections import DataDriftProfileSection, NumTargetDriftProfileSection
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
import os
import json
import mlflow
from functools import wraps
from mlflow.tracking import MlflowClient
%matplotlib inline

In [1]:
# set up logging
reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S')

NameError: name 'reload' is not defined

In [3]:
# mlflow runner
def mlflow_runner(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        # log into MLflow
        client = MlflowClient()
        experiment_name = kwargs['experiment_name']
        try:
            mlflow.set_experiment(experiment_name)
        except Exception:
            logging.error('Experiment does not exists')
            logging.error('Stack trace:{}'.format(traceback.format_exc()))
            experiment_id = mlflow.create_experiment(experiment_name)
            mlflow.set_experiment(experiment_name)
        finally:
            with mlflow.start_run() as run:
                rv = func(*args, **kwargs)
            return rv

    return wrapper

## Data

### Load data

In [4]:
df = pd.read_csv('cpp_demand_forecasting_clean_data_v2.csv', index_col=0)

In [5]:
logging.info(f'dataset shape : {df.shape}')

02:07:22 INFO:dataset shape : (13315, 33)


In [6]:
# set random state
rng = np.random.RandomState(0)

In [7]:
df.head(2)

Unnamed: 0_level_0,date,warehouse_ID,Latitude,Longitude,Product_Type,year,month,is_weekend,is_warehouse_closed,daily_dispatch_count,...,statewise_land_area_per_sqmile,statewise_population_per_sqmile,statewise_geographic_region,geographic_region_division,statewise_median_age,statewise_median_household_income,total_count,yearly_count,monthly_count,weekly_count
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0x2710,2017-01-01,WH_0x3e9,41.681471,-72.794746,Type_A,2017,1,Yes,No,5.5,...,4842,744.722016,new_england,north_east,41.2,78833,0,0,0,0
0x33e6,2017-01-01,WH_0x3ea,38.749077,-105.18306,Type_A,2017,1,Yes,No,6.1,...,103642,56.078318,mountain,west,37.3,41053,0,0,0,0


In [8]:
df.date = pd.to_datetime(df.date)

<b> Data available during initial model deployment is from 2017-01-01 to 2019-03-01

In [9]:
intial_df = df[df.date < datetime(2019,3,1)]

In [None]:
# Split the dataset
train_df, test_df = train_test_split(intial_df,
                                     test_size=0.3,
                                     shuffle=False,
                                     random_state=rng)
eval_df, serve_df = train_test_split(test_df,
                                     test_size=0.5,
                                     shuffle=False,
                                     random_state=rng)

In [11]:
logging.info(f'train_df shape : {train_df.shape}')
logging.info(f'eval_df shape : {eval_df.shape}')
logging.info(f'serve_df shape : {serve_df.shape}')

02:07:23 INFO:train_df shape : (2794, 33)
02:07:23 INFO:eval_df shape : (599, 33)
02:07:23 INFO:serve_df shape : (599, 33)


### Data preprocessing

In [12]:
def split_features_target(df):
    target =  'daily_dispatch_count'
    drop_feats = ['weekly_dispatch_count']
    y = df[target]
    X = df.drop(columns = drop_feats+[target])
    return X,y

In [13]:
X,y = split_features_target(train_df)
X_eval,y_eval = split_features_target(eval_df)
X_serve,_ = split_features_target(serve_df)

### Data transformation

In [14]:
ht = HyperTransformer()

In [15]:
ht.detect_initial_config(data=X)

Detecting a new config from the data ... SUCCESS
Setting the new config ... SUCCESS
Config:
{
    "sdtypes": {
        "date": "datetime",
        "warehouse_ID": "categorical",
        "Latitude": "numerical",
        "Longitude": "numerical",
        "Product_Type": "categorical",
        "year": "numerical",
        "month": "numerical",
        "is_weekend": "categorical",
        "is_warehouse_closed": "categorical",
        "week": "numerical",
        "state": "categorical",
        "county": "categorical",
        "state_cases": "numerical",
        "state_deaths": "numerical",
        "county_cases": "numerical",
        "county_deaths": "numerical",
        "day_of_week": "numerical",
        "days_since_warehouse_started": "numerical",
        "state_abbr": "categorical",
        "is_holiday": "numerical",
        "statewise_population": "numerical",
        "statewise_land_area_per_sqmile": "numerical",
        "statewise_population_per_sqmile": "numerical",
        "statew

In [16]:
ht.fit(X)

In [17]:
X_transformed = ht.transform(X)
X_eval_transformed = ht.transform(X_eval)

## Modelling

In [18]:
### Hypertuning

def objective(trial,data=X_transformed, target=y):
    
    param = {
        "n_estimators" : trial.suggest_int('n_estimators', 0, 500),
        'max_depth':trial.suggest_int('max_depth', 2, 20),
        'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
        'learning_rate':trial.suggest_loguniform('learning_rate',0.005,0.5),
        'colsample_bytree':trial.suggest_uniform('colsample_bytree',0.3,0.9),
        'subsample':trial.suggest_uniform('subsample',0.4,0.9),
        'random_state': trial.suggest_categorical('random_state',[rng]),
        'nthread' : trial.suggest_categorical('nthread',[-1])
    }
    model = xgb.XGBRegressor(**param)  
    
    model.fit(X_transformed,y,eval_set=[(X_eval_transformed,y_eval)],early_stopping_rounds=20,verbose=False)
    
    preds = model.predict(X_eval_transformed)
    
    r2 = r2_score(y_eval, preds)
    
    return r2
optuna.logging.set_verbosity(0)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

In [2]:
@mlflow_runner
def fit_model(X,y, X_test,y_test, params=None,mlflow_log = False, experiment_name=None):
    if params:
        reg = xgb.XGBRegressor(**params)
    else:
        reg = xgb.XGBRegressor()
    reg.fit(X,y,eval_set=[(X_test,y_test)])
    if mlflow_log:
        mlflow.log_dict(reg.get_params(),'xgboost_regressor_params')
        
    return reg

NameError: name 'mlflow_runner' is not defined

In [20]:
reg = fit_model(X_transformed,y, X_eval_transformed,y_eval, mlflow_log=True,experiment_name='warehouse_demand_forecasting')

[0]	validation_0-rmse:4.19794
[1]	validation_0-rmse:2.98326
[2]	validation_0-rmse:2.01086
[3]	validation_0-rmse:1.49083
[4]	validation_0-rmse:1.16661
[5]	validation_0-rmse:1.02677
[6]	validation_0-rmse:0.99539
[7]	validation_0-rmse:0.97808
[8]	validation_0-rmse:0.98222
[9]	validation_0-rmse:0.99538
[10]	validation_0-rmse:1.00977
[11]	validation_0-rmse:1.02064
[12]	validation_0-rmse:1.02124
[13]	validation_0-rmse:1.02639
[14]	validation_0-rmse:1.01432
[15]	validation_0-rmse:1.01673
[16]	validation_0-rmse:1.01896
[17]	validation_0-rmse:1.02170
[18]	validation_0-rmse:1.02563
[19]	validation_0-rmse:1.03005
[20]	validation_0-rmse:1.02896
[21]	validation_0-rmse:1.01607
[22]	validation_0-rmse:1.01421
[23]	validation_0-rmse:1.01449
[24]	validation_0-rmse:1.01212
[25]	validation_0-rmse:1.01278
[26]	validation_0-rmse:1.00609
[27]	validation_0-rmse:1.00811
[28]	validation_0-rmse:1.01005
[29]	validation_0-rmse:1.01010
[30]	validation_0-rmse:1.01220
[31]	validation_0-rmse:1.01277
[32]	validation_0-

#### Train score

#### Eval score

#### Serve score

### Feature attribution weighted drift detection

In [21]:
def get_feature_importance(model, X):
    # DF, based on which importance is checked

    # Explain model predictions using shap library:
    explainer = shap.TreeExplainer(model)
    shap_scores = explainer.shap_values(X)

    # mean of absolute shap values for every feature
    mean_abs_shap_values = pd.Series(
        np.abs(shap_scores).mean(axis=0),
        index=X.columns).sort_values(ascending=False)
    return mean_abs_shap_values

In [22]:
def get_relative_feature_importance_to_max(feature_importance):
    
    # maximum feature importance
    max_feature_importance = feature_importance.max()

    # relative_feature_importance_to_max is relative feature importance w.r.t max_feature_importance
    relative_feature_importance_to_max = feature_importance / max_feature_importance
    return relative_feature_importance_to_max.to_dict()

In [23]:
numerical_features = [
    'statewise_population_per_sqmile', 'state_cases', 'state_deaths'
]
numerical_features_value = [
    feature + '.value' for feature in numerical_features
]
categorical_features = [
    'warehouse_ID', 'Product_Type', 'is_weekend', 'is_warehouse_closed',
    'state', 'day_of_week', 'is_holiday', 'county'
]
categorical_features_value = [
    feature + '.value' for feature in categorical_features
]
column_mapping = ColumnMapping(id='ID',
                               datetime='date',
                               numerical_features=numerical_features_value,
                               categorical_features=categorical_features_value,
                               task='regression',
                               target='daily_dispatch_count',
                               prediction='y_pred')

In [24]:
def get_drift_profile(reference, production, profile, mlflow_log=False):

    drift_profile = Profile(sections=[profile()])
    drift_profile.calculate(reference,
                                 production,
                                 column_mapping=column_mapping)
    report = drift_profile.json()
    drift_profile_report = json.loads(report)
    
    if mlflow_log:
        if profile == DataDriftProfileSection:
            name = 'data_drift_profile'
        elif profile == NumTargetDriftProfileSection:
            name = 'num_target_drift_profile'
        else:
            pass
        mlflow.log_dict(drift_profile_report, f"{name}.json")
    
    
    return drift_profile_report

In [25]:
def get_data_drift(drift_profile):

    drifts = []
    for feature in column_mapping.numerical_features + column_mapping.categorical_features:
        drifts.append(
            (feature, drift_profile['data_drift']['data']['metrics'][feature]['drift_score'], 
             drift_profile['data_drift']['data']['metrics'][feature]['stattest_name'], 
             drift_profile['data_drift']['data']['metrics'][feature]['drift_detected']))
    return  pd.DataFrame(
        drifts,
        columns=['feature', 'drift_score', 'stattest_name', 'drift_detected'])

In [26]:
def get_weighted_data_drift_score(drift, feature_importance, relative_feature_importance_to_max_dict, threshold, stattest_type):
    
    #feature_importance
    drift['feature_importance'] = drift['feature'].map(feature_importance)
    
    # map relative feature importance
    drift['relative_feature_importance'] = drift['feature'].map(relative_feature_importance_to_max_dict)
    if stattest_type == 'p_value':
        # relative feature importance weighted drift score
        inverse_feature_importance_wrt_max_feature_importance = drift.relative_feature_importance.max() / drift.relative_feature_importance
        drift['feature_importance_weighted_drift_score'] = (drift.drift_score * inverse_feature_importance_wrt_max_feature_importance).replace(np.inf, 1)

        # drift detection based on the weighted drift score based on threshold
        drift['feature_importance_weighted_drift_detected'] = drift[
            'feature_importance_weighted_drift_score'] < threshold
    else:
        # relative feature importance weighted drift score
        drift['feature_importance_weighted_drift_score'] = drift[
            'relative_feature_importance'] * drift['drift_score']

        # drift detection based on the weighted drift score based on threshold
        drift['feature_importance_weighted_drift_detected'] = drift[
            'feature_importance_weighted_drift_score'] > threshold

    return drift

In [27]:
def is_weighted_data_drift_detected(data_drift_profile, production,  model):
    drift_detected = False
    
    feature_importance = get_feature_importance(model, production)
    # relative feature importance based on mean shap values
    relative_feature_importance_to_max_dict = get_relative_feature_importance_to_max(feature_importance)
    drift_scores = get_data_drift(data_drift_profile)
    stattest_type = drift_scores['stattest_name'].iloc[0].split()[1]
    if stattest_type == 'p_value':
        threshold = 0.05
    else:
        threshold = 0.1
    weighted_drift_scores = get_weighted_data_drift_score(drift_scores, feature_importance, relative_feature_importance_to_max_dict,
                                                     threshold, stattest_type)

    if weighted_drift_scores['feature_importance_weighted_drift_detected'].sum(
    ) > 0:
        drift_detected = True
    else:
        drift_detected = False
    return drift_detected, weighted_drift_scores

In [346]:
def overlay_distribution(data1, data2, feature,  opacity=0.5, mlflow_log = False):
    normalization_type = 'probability density'
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=data1, name="training",histnorm=normalization_type))
    fig.add_trace(go.Histogram(x=data2, name="production",histnorm=normalization_type))
    feature = feature.split('.')[0]
    # Overlay both histograms
    fig.update_layout(barmode='overlay', title=feature+" Distribution",
                      xaxis_title=feature,
                      yaxis_title=normalization_type,)
    # Reduce opacity to see both histograms
    fig.update_traces(opacity=opacity)
    if mlflow_log:
        mlflow.log_figure(fig, f"{feature}_Distribution.html")
        
    fig.show()

In [336]:
def display_drifted_features(reference, production, drift_df, mlflow_log = False):
    reference_rdt = ht.reverse_transform(reference)
    production_rdt = ht.reverse_transform(production)
    for index, row in drift_df.iterrows():
        if row['feature_importance_weighted_drift_detected']:

            logging.info(f"drift detected for {row['feature']}")
            logging.info(f"drift_score : {row['drift_score']}")
            logging.info(f"stattest_name : {row['stattest_name']}")
            logging.info(f"raw_feature_importance : {row['feature_importance']}") 
            logging.info(f"relative_feature_importance : {row['relative_feature_importance']}") 
            logging.info(f"feature_importance_weighted_drift_score : {row['feature_importance_weighted_drift_score']}")
            
            overlay_distribution(reference_rdt[row['feature'].split('.')[0]], production_rdt[row['feature'].split('.')[0]], row['feature'], opacity = .4,mlflow_log=True)
        if mlflow_log:
                mlflow.log_param(f"{row['feature']}_feature_importance_weighted_drift_detected", row['feature_importance_weighted_drift_detected'])
                mlflow.log_param(f"{row['feature']}_drift_score", row['drift_score'])
                mlflow.log_param(f"{row['feature']}_stattest_name", row['stattest_name'])
                mlflow.log_param(f"{row['feature']}_raw_feature_importance", row['feature_importance'])
                mlflow.log_param(f"{row['feature']}_relative_feature_importance", row['relative_feature_importance'])
                mlflow.log_param(f"{row['feature']}_feature_importance_weighted_drift_score", row['feature_importance_weighted_drift_score'])

    

In [337]:
def display_target_drift(y_reference, y_production, target_name, target_drift_detected, drift_score, stattest_type, mlflow_log = False):
    logging.info(f"Target drift detected for for {target_name}")
    logging.info(f"drift_score : {drift_score}")
    logging.info(f"stattest_name : {stattest_type}")
    if mlflow_log:
        mlflow.log_param(f"{target_name}_drift_detected", target_drift_detected)
        mlflow.log_param(f"{target_name}_target_drift_score", drift_score)
        mlflow.log_param(f"{target_name}_target_stattest_name", stattest_type)
    overlay_distribution(y_reference, y_production, target_name, opacity = .4, mlflow_log=True)
            

#### Test for target drift

In [338]:
def determine_test_type(reference):

    num_data = reference.shape[0]

    if num_data <= 1000:
        return 'p_value'
    else:
        return 'distance'

In [339]:
def test_target_drift(X_reference, y_reference, X_production, y_production, mlflow_log = False):
    target_drift_detected = False
    # get target drift report
    if column_mapping.task == 'regression':
        profile = NumTargetDriftProfileSection
        drift_name = 'num_target_drift'
    else:
        pass
    target_drift_profile = get_drift_profile(X_reference.join(y_reference), X_production.join(y_production), profile, mlflow_log)
    
    drift_score = target_drift_profile[drift_name]['data']['metrics']['target_drift']
    target_name = target_drift_profile[drift_name]['data']['utility_columns']['target']
    stattest_type = determine_test_type(X_reference)
    if stattest_type == 'p_value':
        threshold = 0.05
        if drift_score <= threshold:

            target_drift_detected = True

    else:
        threshold = 0.1
        if drift_score >= threshold:

            target_drift_detected = True
   
    if mlflow_log:
        display_target_drift(y_reference, y_production, target_name, target_drift_detected, drift_score, stattest_type, mlflow_log = True)
    return target_drift_detected
      

In [340]:
def test_for_drift(X_reference, y_reference, X_production, y_production, model, mlflow_log = False):

    
    target_drift_detected = test_target_drift(X_reference, y_reference, X_production, y_production, mlflow_log)

    logging.info(f"target drift detect: {target_drift_detected}")
    # get data drift report
    data_drift_profile = get_drift_profile(X_reference, X_production,
                                           DataDriftProfileSection, mlflow_log)
    # data drift detected?
    drift_detected, drift_df = is_weighted_data_drift_detected(data_drift_profile,
                                                     X_production,  model)
    
    if drift_detected:
        logging.info('data set drift detected')
        display_drifted_features(X_reference, X_production, drift_df, mlflow_log)
    else:
        logging.info('drift not detected')
    drift_detected = drift_detected or target_drift_detected
    return drift_detected

In [341]:
@mlflow_runner
def test_mlflow(experiment_name):
    # Log parameters
    mlflow.log_param("begin", datetime(2020,1,2))
    mlflow.log_param("end", datetime(2022,1,2))
    mlflow.log_dict(X_transformed.sample(500).to_dict(), 'input_features.json')
    mlflow.log_dict(X_eval_transformed.sample(500).to_dict(), 'target.json')
    return test_for_drift(X_transformed.sample(500), y.sample(500), X_eval_transformed.sample(500), y_eval.sample(500) ,reg, mlflow_log = True)


In [342]:
#test_mlflow(experiment_name = 'Data Drift Evaluation with Evidently')

In [361]:
def rewrite_name(df_type, error_type):
    return f"{df_type}{'_'}{error_type}"

def get_scores(y_actual, y_predicted,df_type='train'):
    #y_predicted = np.expm1(y_predicted)
    #y_actual = np.expm1(y_actual)
    rmse = mean_squared_error(y_actual, y_predicted, squared=False)
    mse = mean_squared_error(y_actual, y_predicted, squared=True)
    mae = mean_absolute_error(y_actual, y_predicted)
    mape = mean_absolute_percentage_error(y_actual, y_predicted)
    return {rewrite_name(df_type, 'rmse'):rmse, rewrite_name(df_type, 'mse'):mse, rewrite_name(df_type, 'mae'):mae, rewrite_name(df_type, 'mape'):mape}

def dataset_results(X,y, model, df_type):

    y_pred = model.predict(X, y)
    
    return get_scores(y,y_pred,df_type)
    

In [395]:
@mlflow_runner
def simulate_Weekly_experiment(df, model, preds, experiment_name, week_num):
    # Get weekly data
        row = df.iloc[0]
        dt = row['date']
        start = dt - timedelta(days=row['date'].weekday())
        end = start + timedelta(days=6)
        sub_df = df.set_index('date')[start:end]
        sub_df = sub_df.reset_index().set_index('ID')
        df = df[(df.date.dt.date > end.date())]
        print(f'Experiment week {week_num}')
        print('---------------------------------')
        print(f"Experiment start date : {start.to_pydatetime()}")
        print(f"Experiment end_date : {end.to_pydatetime()}")
        mlflow.log_param('experiment_week',week_num)
        mlflow.log_param('week_begin_experiment_start_date',start)
        mlflow.log_param('week_end_experiment_end_date',end)
        
        # Transform sub_df to X, y
        X_prod,y_prod = split_features_target(sub_df)
        ht.fit(X_prod)
        X_prod_transformed = ht.transform(X_prod)
        mlflow.log_dict(X_prod_transformed.to_dict(), 'input_features.json')
        mlflow.log_dict(y_prod.to_dict(), 'target.json')
        # weekly_prediction
        weekly_predictions = model.predict(X_prod_transformed)

        mlflow.log_dict(pd.Series(weekly_predictions,name=y_prod.name, index = y_prod.index).to_dict(),'weekly_predictions.json')
        # Predict for X,y
        scores_dict = get_scores(y_prod,weekly_predictions,f"weekly")
        mlflow.log_metrics(scores_dict)
        preds.extend(weekly_predictions.tolist())
        
        if test_for_drift(X_transformed, y, X_prod_transformed, y_prod, reg, mlflow_log=True):
            # retrain model
            # test for drift an
            logging.info('Retraining model')
            model.fit(X_prod_transformed, y_prod)
        else:
            logging.info('No Drift detection')
        return df, preds

In [396]:
def simulate_model(df, model, experiment_name):
    
    preds = []
    df = df.reset_index()
    current_df = df
    week_num = 1
    while current_df.shape[0] > 0:

        current_df, preds = simulate_Weekly_experiment(current_df, model, preds, experiment_name = experiment_name, week_num=week_num)
        week_num +=1
    return df,preds   

In [397]:
df, preds = simulate_model(serve_df, reg, experiment_name='weekly_ml_monitoring')

2022/06/27 07:49:03 INFO mlflow.tracking.fluent: Experiment with name 'weekly_ml_monitoring' does not exist. Creating a new experiment.


Experiment week 1
---------------------------------
Experiment start date : 2018-12-24 00:00:00
Experiment end_date : 2018-12-30 00:00:00


07:49:03 INFO:Target drift detected for for daily_dispatch_count
07:49:03 INFO:drift_score : 0.8506966925281619
07:49:03 INFO:stattest_name : distance


07:49:04 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:04 INFO:data set drift detected
07:49:04 INFO:drift detected for Product_Type.value
07:49:04 INFO:drift_score : 0.8325546111576977
07:49:04 INFO:stattest_name : Jensen-Shannon distance
07:49:04 INFO:raw_feature_importance : 1.775588870048523
07:49:04 INFO:relative_feature_importance : 1.0
07:49:04 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:04 INFO:Retraining model


Experiment week 2
---------------------------------
Experiment start date : 2018-12-31 00:00:00
Experiment end_date : 2019-01-06 00:00:00


07:49:05 INFO:Target drift detected for for daily_dispatch_count
07:49:05 INFO:drift_score : 0.7603571839965731
07:49:05 INFO:stattest_name : distance


07:49:05 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:05 INFO:data set drift detected
07:49:05 INFO:drift detected for Product_Type.value
07:49:05 INFO:drift_score : 0.8325546111576977
07:49:05 INFO:stattest_name : Jensen-Shannon distance
07:49:05 INFO:raw_feature_importance : 2.527564287185669
07:49:05 INFO:relative_feature_importance : 1.0
07:49:05 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:05 INFO:Retraining model


Experiment week 3
---------------------------------
Experiment start date : 2019-01-07 00:00:00
Experiment end_date : 2019-01-13 00:00:00


07:49:06 INFO:Target drift detected for for daily_dispatch_count
07:49:06 INFO:drift_score : 0.7807266090424902
07:49:06 INFO:stattest_name : distance


07:49:06 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:06 INFO:data set drift detected
07:49:06 INFO:drift detected for Product_Type.value
07:49:06 INFO:drift_score : 0.8325546111576977
07:49:06 INFO:stattest_name : Jensen-Shannon distance
07:49:06 INFO:raw_feature_importance : 1.554994821548462
07:49:06 INFO:relative_feature_importance : 1.0
07:49:06 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:06 INFO:Retraining model


Experiment week 4
---------------------------------
Experiment start date : 2019-01-14 00:00:00
Experiment end_date : 2019-01-20 00:00:00


07:49:07 INFO:Target drift detected for for daily_dispatch_count
07:49:07 INFO:drift_score : 0.7568025391727874
07:49:07 INFO:stattest_name : distance


07:49:07 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:07 INFO:data set drift detected
07:49:08 INFO:drift detected for Product_Type.value
07:49:08 INFO:drift_score : 0.8325546111576977
07:49:08 INFO:stattest_name : Jensen-Shannon distance
07:49:08 INFO:raw_feature_importance : 1.6276414394378662
07:49:08 INFO:relative_feature_importance : 1.0
07:49:08 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:08 INFO:Retraining model


Experiment week 5
---------------------------------
Experiment start date : 2019-01-21 00:00:00
Experiment end_date : 2019-01-27 00:00:00


07:49:08 INFO:Target drift detected for for daily_dispatch_count
07:49:08 INFO:drift_score : 0.739195939029973
07:49:08 INFO:stattest_name : distance


07:49:08 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:09 INFO:data set drift detected
07:49:09 INFO:drift detected for Product_Type.value
07:49:09 INFO:drift_score : 0.8325546111576977
07:49:09 INFO:stattest_name : Jensen-Shannon distance
07:49:09 INFO:raw_feature_importance : 1.5279386043548584
07:49:09 INFO:relative_feature_importance : 1.0
07:49:09 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:09 INFO:Retraining model


Experiment week 6
---------------------------------
Experiment start date : 2019-01-28 00:00:00
Experiment end_date : 2019-02-03 00:00:00


07:49:10 INFO:Target drift detected for for daily_dispatch_count
07:49:10 INFO:drift_score : 0.6941825430893434
07:49:10 INFO:stattest_name : distance


07:49:10 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:10 INFO:data set drift detected
07:49:10 INFO:drift detected for Product_Type.value
07:49:10 INFO:drift_score : 0.8325546111576977
07:49:10 INFO:stattest_name : Jensen-Shannon distance
07:49:10 INFO:raw_feature_importance : 1.5493420362472534
07:49:10 INFO:relative_feature_importance : 1.0
07:49:10 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:10 INFO:Retraining model


Experiment week 7
---------------------------------
Experiment start date : 2019-02-04 00:00:00
Experiment end_date : 2019-02-10 00:00:00


07:49:11 INFO:Target drift detected for for daily_dispatch_count
07:49:11 INFO:drift_score : 0.6648371414155752
07:49:11 INFO:stattest_name : distance


07:49:11 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:11 INFO:data set drift detected
07:49:11 INFO:drift detected for Product_Type.value
07:49:11 INFO:drift_score : 0.8325546111576977
07:49:11 INFO:stattest_name : Jensen-Shannon distance
07:49:11 INFO:raw_feature_importance : 1.768558144569397
07:49:11 INFO:relative_feature_importance : 1.0
07:49:11 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:11 INFO:Retraining model


Experiment week 8
---------------------------------
Experiment start date : 2019-02-11 00:00:00
Experiment end_date : 2019-02-17 00:00:00


07:49:12 INFO:Target drift detected for for daily_dispatch_count
07:49:12 INFO:drift_score : 0.602266990111311
07:49:12 INFO:stattest_name : distance


07:49:12 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:12 INFO:data set drift detected
07:49:12 INFO:drift detected for Product_Type.value
07:49:12 INFO:drift_score : 0.8325546111576977
07:49:12 INFO:stattest_name : Jensen-Shannon distance
07:49:12 INFO:raw_feature_importance : 1.8100552558898926
07:49:12 INFO:relative_feature_importance : 1.0
07:49:12 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:13 INFO:Retraining model


Experiment week 9
---------------------------------
Experiment start date : 2019-02-18 00:00:00
Experiment end_date : 2019-02-24 00:00:00


07:49:13 INFO:Target drift detected for for daily_dispatch_count
07:49:13 INFO:drift_score : 0.6454393335295251
07:49:13 INFO:stattest_name : distance


07:49:13 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:13 INFO:data set drift detected
07:49:14 INFO:drift detected for Product_Type.value
07:49:14 INFO:drift_score : 0.8325546111576977
07:49:14 INFO:stattest_name : Jensen-Shannon distance
07:49:14 INFO:raw_feature_importance : 1.8101354837417603
07:49:14 INFO:relative_feature_importance : 1.0
07:49:14 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:14 INFO:Retraining model


Experiment week 10
---------------------------------
Experiment start date : 2019-02-25 00:00:00
Experiment end_date : 2019-03-03 00:00:00


07:49:14 INFO:Target drift detected for for daily_dispatch_count
07:49:14 INFO:drift_score : 0.6019716597946714
07:49:14 INFO:stattest_name : distance


07:49:14 INFO:target drift detect: True

ntree_limit is deprecated, use `iteration_range` or model slicing instead.

07:49:15 INFO:data set drift detected
07:49:15 INFO:drift detected for Product_Type.value
07:49:15 INFO:drift_score : 0.8325546111576977
07:49:15 INFO:stattest_name : Jensen-Shannon distance
07:49:15 INFO:raw_feature_importance : 1.7997283935546875
07:49:15 INFO:relative_feature_importance : 1.0
07:49:15 INFO:feature_importance_weighted_drift_score : 0.8325546111576977


07:49:15 INFO:Retraining model


#### Drift detection graphs

In [398]:
expirement_results = mlflow.search_runs(experiment_names=['weekly_ml_monitoring'])

In [399]:
expirement_results.set_index('params.week_end_experiment_end_date',inplace=True)
expirement_results.sort_index(ascending=True,inplace=True)

In [400]:
expirement_results.head(2)

Unnamed: 0_level_0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.weekly_rmse,metrics.weekly_mse,metrics.weekly_mape,metrics.weekly_mae,...,params.warehouse_ID.value_feature_importance_weighted_drift_score,params.county.value_stattest_name,params.is_weekend.value_drift_score,params.warehouse_ID.value_feature_importance_weighted_drift_detected,params.state_deaths.value_raw_feature_importance,params.state.value_relative_feature_importance,params.experiment_week,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.source.name
params.week_end_experiment_end_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-30 00:00:00,6cfe04125b5d42d9a5057a290caf866e,2,FINISHED,file:///C:/Users/shree/projects/warehouse_dema...,2022-06-27 02:19:03.717000+00:00,2022-06-27 02:19:04.749000+00:00,4.317174,18.63799,1.039283,4.24139,...,0.0123057641499587,Jensen-Shannon distance,0.8325546111576977,False,0.0,0.0,1,LOCAL,shree,d:\anaconda\envs\anaconda_env\lib\site-package...
2019-01-06 00:00:00,969b70a3c7454dfd8b63500807fe8d7b,2,FINISHED,file:///C:/Users/shree/projects/warehouse_dema...,2022-06-27 02:19:04.773000+00:00,2022-06-27 02:19:05.890000+00:00,4.292296,18.423802,0.988771,4.156671,...,0.0094419490993625,Jensen-Shannon distance,0.8325546111576978,False,0.0,0.0,2,LOCAL,shree,d:\anaconda\envs\anaconda_env\lib\site-package...


In [401]:
expirement_results.artifact_uri.iloc[0]

'file:///C:/Users/shree/projects/warehouse_demand_forecasting/mlruns/2/6cfe04125b5d42d9a5057a290caf866e/artifacts'

In [402]:
all_cols = expirement_results.columns
drift_detection_cols = all_cols[all_cols.str.endswith('drift_detected')]
drift_score_cols = all_cols[all_cols.str.endswith('drift_score')]

In [403]:
for col in drift_detection_cols:
    expirement_results[col] = expirement_results[col].replace([None],False).replace('False',False).astype(bool).astype(int)

In [404]:
for col in drift_score_cols:
    expirement_results[col] = expirement_results[col].replace([None],0).astype(float)

In [405]:
drift_detected_cols = []
for col in drift_detection_cols:
    if expirement_results[col].sum() > 0:
        drift_detected_cols.append(col)


In [413]:
drift_detected_cols

['params.daily_dispatch_count_drift_detected',
 'params.Product_Type.value_feature_importance_weighted_drift_detected']

In [406]:
drift_detected_features = pd.Series(drift_detected_cols).map(lambda x:x.split('.')[1]).str.replace('_drift_detected','').to_list()

In [407]:
drift_detected_experiment_cols = all_cols[[any([(feature in col) for feature in drift_detected_features]) for col in all_cols.to_list()]]

In [408]:
drift_detected_experiment_results = expirement_results[drift_detected_experiment_cols]

In [409]:
fig = px.line(drift_detected_experiment_results,  y='params.daily_dispatch_count_target_drift_score', markers=True)
fig.show()

In [412]:
fig = px.line(drift_detected_experiment_results,  y='params.Product_Type.value_feature_importance_weighted_drift_detected', markers=True)
fig.show()

In [411]:
fig = px.line(drift_detected_experiment_results,  y='params.daily_dispatch_count_drift_detected', markers=True)
fig.show()

In [368]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Table(header=dict(values=['A Scores', 'B Scores']),
                 cells=dict(values=[from IPython.display import HTML
HTML(filename='file:///C:/Users/shree/projects/warehouse_demand_forecasting/mlruns/2/9195f8a89dda40c8852df8e9e4176323/artifacts/daily_dispatch_count_Distribution.html')], [95]]))
                     ])
fig.show()

In [None]:
fig = px.line(drift_detected_experiment_results,  y='params.Product_Type.value_drift_score', markers=True)
fig.show()

In [283]:
drift_detected_experiment_results

Unnamed: 0_level_0,params.warehouse_ID.value_drift_score,params.Product_Type.value_stattest_name,params.Product_Type.value_drift_score,params.daily_dispatch_count_target_drift_score,params.daily_dispatch_count_drift_detected,params.warehouse_ID.value_stattest_name,params.warehouse_ID.value_relative_feature_importance,params.Product_Type.value_feature_importance_weighted_drift_detected,params.Product_Type.value_feature_importance_weighted_drift_score,params.warehouse_ID.value_raw_feature_importance,params.Product_Type.value_relative_feature_importance,params.daily_dispatch_count_target_stattest_name,params.Product_Type.value_raw_feature_importance,params.warehouse_ID.value_feature_importance_weighted_drift_score,params.warehouse_ID.value_feature_importance_weighted_drift_detected
params.week_end_experiment_end_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-10-21 00:00:00,0.0,,0.0,0.227558,1,,,0,0.0,,,distance,,0.0,0
2018-10-28 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.28241,1,Jensen-Shannon distance,0.1581028699874878,1,0.832555,0.3560701012611389,1.0,distance,2.2521419525146484,0.131629,1
2018-11-04 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.155107,1,Jensen-Shannon distance,0.0092434706166386,1,0.832555,0.0208473093807697,1.0,distance,2.2553551197052,0.007696,0
2018-11-11 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.143172,1,Jensen-Shannon distance,0.0754168629646301,1,0.832555,0.1936871260404586,1.0,distance,2.568220376968384,0.062789,0
2018-11-18 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.162773,1,Jensen-Shannon distance,0.1364287137985229,1,0.832555,0.3557900786399841,1.0,distance,2.6078827381134038,0.113584,1
2018-11-25 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.151224,1,Jensen-Shannon distance,0.1000199541449546,1,0.832555,0.2583551406860351,1.0,distance,2.583035945892334,0.083272,0
2018-12-02 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.225175,1,Jensen-Shannon distance,0.1055342033505439,1,0.832555,0.2629141509532928,1.0,distance,2.491269588470459,0.087863,0
2018-12-09 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.602318,1,Jensen-Shannon distance,0.0870217382907867,1,0.832555,0.1689471006393432,1.0,distance,1.9414355754852293,0.07245,0
2018-12-16 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.649918,1,Jensen-Shannon distance,0.131396010518074,1,0.832555,0.2590985596179962,1.0,distance,1.9718906879425049,0.109394,1
2018-12-23 00:00:00,0.832555,Jensen-Shannon distance,0.832555,0.583242,1,Jensen-Shannon distance,0.1320816725492477,1,0.832555,0.2446619421243667,1.0,distance,1.8523534536361692,0.109965,1


In [388]:
from IPython.display import IFrame
from IPython.display import display
IFrame(width=200,height=200,src='C:/Users/shree/projects/warehouse_demand_forecasting/mlruns/2/9195f8a89dda40c8852df8e9e4176323/artifacts/daily_dispatch_count_Distribution.html')

In [386]:
from IPython.display import HTML
display('C:/Users/shree/projects/warehouse_demand_forecasting/mlruns/2/9195f8a89dda40c8852df8e9e4176323/artifacts/daily_dispatch_count_Distribution.html')

'C:/Users/shree/projects/warehouse_demand_forecasting/mlruns/2/9195f8a89dda40c8852df8e9e4176323/artifacts/daily_dispatch_count_Distribution.html'

In [173]:
mean_squared_error(y_eval, preds, squared=False)

1.2158141495789463