<h1 style="color:green">Model monitoring with Evidently</h1>

In [2]:
import pandas as pd
import numpy as np

import time

import seaborn as sb
import matplotlib.pyplot as plt



# Metrics Accuracy
from sklearn.metrics import \
accuracy_score, confusion_matrix, recall_score, roc_curve,roc_auc_score

# joblib for storing models
import joblib


# Evidently
from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.dashboard.tabs import ClassificationPerformanceTab,ProbClassificationPerformanceTab



<h3 style="color: green;">Reading in the data</h3>

In [22]:
X_train =pd.read_csv("../Data/base_classifier_data.csv")
X_test =pd.read_csv("../Data/test_classifier_data.csv")

# Reading in the target
y_train =pd.read_csv("../Data/base_classifier_target.csv")
y_test =pd.read_csv("../Data/test_classifier_target.csv")

In [10]:
print(X_train.shape)
print(X_test.shape)

(587, 156)
(240, 156)


<h3 style="color: green;">Load the model using joblib</h3>

In [20]:
xgb_loaded = joblib.load('../Models/Classifier_models/xgboost_classifier23032023')

In [23]:
probas_name = ['target_0','target_1']
train_probas = pd.DataFrame(xgb_loaded.predict_proba(X_train), columns=probas_name)
test_probas = pd.DataFrame(xgb_loaded.predict_proba(X_test), columns=probas_name)

In [24]:
# storing the class predictions to train and test
X_train['prediction'] = xgb_loaded.predict(X_train)
X_test['prediction'] = xgb_loaded.predict(X_test)

# reseting the index in preparation for concatenation
X_train.reset_index(inplace=True, drop=True)
X_test.reset_index(inplace=True, drop=True)

# Concatenating probability predictions to train and test
X_train = pd.concat([X_train,train_probas], axis=1)
X_test = pd.concat([X_test,test_probas], axis=1)



In [25]:
# Adding the target to the datasets
X_train['target'] = y_train.values
X_test['target'] = y_test.values

In [26]:
X_train.head()

Unnamed: 0,Total_Nbr_of_Items,Total_Price,Date_diff,Week_day_numeric,Nbr_items_per_wk,Nbr_items_wk_perc,Expenditure_per_wk,Total_Exp_wk_perc,hour,Part_of_day_num,...,Afro_supermarket,Amazon,Yours_Supermarket,Sainsbury,Asda,LinkedIn,prediction,target_0,target_1,target
0,-0.508363,-0.231013,-0.19183,1.071121,1.475415,-0.681293,0.420746,-0.583656,1.262206,1.556529,...,-0.19262,-0.131647,-0.266773,-0.327944,-0.370941,-0.092688,0,0.995862,0.004138,0
1,1.459563,0.244153,0.177364,1.071121,-0.873191,1.960226,-0.530668,0.366994,0.217192,0.264192,...,-0.19262,-0.131647,-0.266773,-0.327944,-0.370941,-0.092688,1,0.15188,0.84812,1
2,-0.687266,-1.092006,-0.19183,-0.944097,-0.792204,-0.55837,2.732454,-0.988954,0.478446,0.264192,...,-0.19262,-0.131647,-0.266773,-0.327944,-0.370941,-0.092688,1,0.204227,0.795773,1
3,1.280661,0.370798,-0.561025,1.071121,0.422592,0.640134,-0.255744,0.115471,-0.305315,0.264192,...,-0.19262,-0.131647,-0.266773,3.049302,-0.370941,-0.092688,0,0.985461,0.014539,0
4,-0.866168,-1.055682,-0.561025,0.063512,1.394429,-0.863871,-0.501408,-0.908772,-1.089076,-1.028146,...,-0.19262,-0.131647,3.748504,-0.327944,-0.370941,-0.092688,0,0.99426,0.00574,0


In [47]:
x = X_train.columns
x[121:]

Index(['Cosmetics_and_selfcare_wk_perc', 'Cosmetics_and_selfcare_exp_receipt',
       'Cosmetics_and_selfcare_exp_wk', 'Cosmetics_and_selfcare_wk_exp_perc',
       'Clothes_and_shoes_receipt', 'Clothes_and_shoes_wk',
       'Clothes_and_shoes_wk_perc', 'Clothes_and_shoes_exp_receipt',
       'Clothes_and_shoes_exp_wk', 'Clothes_and_shoes_wk_exp_perc',
       'House_and_kitchen_receipt', 'House_and_kitchen_wk',
       'House_and_kitchen_wk_perc', 'House_and_kitchen_exp_receipt',
       'House_and_kitchen_exp_wk', 'House_and_kitchen_wk_exp_perc', 'eBay',
       'Abbey_Sports', 'Currys_PC_World', 'Aldi', 'Europe', 'Udemy',
       'Sports_direct.com', 'Wilko', 'Primark', 'B_and_Q', 'Tesco', 'Maryland',
       'BP', 'Afro_supermarket', 'Amazon', 'Yours_Supermarket', 'Sainsbury',
       'Asda', 'LinkedIn', 'prediction', 'target_0', 'target_1', 'target'],
      dtype='object')

In [42]:
X_train.shape

(587, 160)

In [35]:
X_train['Cooked_meats_receipt'].value_counts()

-0.473868     447
 1.201798     122
 2.877464      15
 6.228797       1
 11.255795      1
 4.553130       1
Name: Cooked_meats_receipt, dtype: int64

(587, 160)

<h5 style="color: green;">Column mapping</h5>

In [48]:
num_feat=['Total_Nbr_of_Items', 'Total_Price', 'Date_diff', 'Week_day_numeric',
           'Nbr_items_per_wk', 'Nbr_items_wk_perc', 'Expenditure_per_wk',
           'Total_Exp_wk_perc', 'hour',  'Bread_receipt',
           'Bread_wk', 'Bread_wk_perc', 'Bread_exp_receipt', 'Bread_exp_wk',
           'Bread_wk_exp_perc', 'Cooked_meats_receipt', 'Cooked_meats_wk',
           'Cooked_meats_wk_perc', 'Cooked_meats_exp_receipt',
           'Cooked_meats_exp_wk', 'Raw_meats_receipt', 'Raw_meats_wk',
           'Raw_meats_wk_perc', 'Raw_meats_exp_receipt', 'Raw_meats_exp_wk',
           'Raw_meats_wk_exp_perc', 'Eating_out_receipt', 'Eating_out_wk',
           'Eating_out_wk_perc', 'Eating_out_exp_receipt','Eating_out_wk_exp_perc','Eating_out_exp_wk', 
           'Snacks_receipt', 'Snacks_wk','Snacks_wk_perc', 'Snacks_exp_receipt', 'Snacks_exp_wk',
           'Snacks_wk_exp_perc', 'Drinks_receipt', 'Drinks_wk', 'Drinks_wk_perc',
           'Drinks_exp_receipt', 'Drinks_exp_wk', 'Drinks_wk_exp_perc',
           'Vegetables_receipt', 'Vegetables_wk', 'Vegetables_wk_perc',
           'Vegetables_exp_receipt', 'Vegetables_exp_wk', 'Vegetables_wk_exp_perc',
           'Fruit_receipt', 'Fruit_wk', 'Fruit_wk_perc', 'Fruit_exp_receipt',
           'Fruit_exp_wk', 'Fruit_wk_exp_perc', 'Cooking_base_receipt',
           'Cooking_base_wk', 'Cooking_base_wk_perc', 'Cooking_base_exp_receipt',
           'Cooking_base_exp_wk', 'Cooking_base_wk_exp_perc',
           'Dairy_produce_receipt', 'Dairy_produce_wk', 'Dairy_produce_wk_perc',
           'Dairy_produce_exp_receipt', 'Dairy_produce_exp_wk',
           'Dairy_produce_wk_exp_perc', 'Seasoning_receipt', 'Seasoning_wk',
           'Seasoning_exp_receipt', 'Seasoning_exp_wk', 'Seasoning_wk_exp_perc','Seasoning_wk_perc',
           'Breakfast_receipt', 'Breakfast_wk', 'Breakfast_wk_perc',
           'Breakfast_exp_receipt', 'Breakfast_exp_wk', 'Breakfast_wk_exp_perc',
           'Transport_wk', 'Transport_wk_perc', 'Transport_exp_receipt',
           'Transport_exp_wk', 'Transport_wk_exp_perc', 'DIY_receipt', 'DIY_wk',
           'DIY_wk_perc', 'DIY_exp_receipt', 'DIY_exp_wk', 'DIY_wk_exp_perc',
           'Electronics_receipt', 'Electronics_wk', 'Electronics_wk_perc',
           'Electronics_exp_receipt', 'Electronics_exp_wk',
           'Electronics_wk_exp_perc', 'Education_receipt', 'Education_wk',
           'Education_wk_perc', 'Education_exp_receipt', 'Education_exp_wk',
           'Education_wk_exp_perc', 'Tech_and_services_receipt',
           'Tech_and_services_wk', 'Tech_and_services_wk_perc',
           'Tech_and_services_exp_receipt', 'Tech_and_services_exp_wk',
           'Tech_and_services_wk_exp_perc', 'Cosmetics_and_selfcare_receipt',
           'Cosmetics_and_selfcare_wk_perc', 'Cosmetics_and_selfcare_exp_receipt',
           'Cosmetics_and_selfcare_exp_wk', 'Cosmetics_and_selfcare_wk_exp_perc','Cosmetics_and_selfcare_wk',
           'Clothes_and_shoes_receipt', 'Clothes_and_shoes_wk',
           'Clothes_and_shoes_wk_perc', 'Clothes_and_shoes_exp_receipt',
           'Clothes_and_shoes_exp_wk', 'Clothes_and_shoes_wk_exp_perc',
           'House_and_kitchen_receipt', 'House_and_kitchen_wk',
           'House_and_kitchen_wk_perc', 'House_and_kitchen_exp_receipt',
           'House_and_kitchen_exp_wk', 'House_and_kitchen_wk_exp_perc'
         
         ]

In [49]:
categorical_feat = ['Part_of_day_num','Ptype_Card',
                    'Ptype_Cash', 'Ptype_Plan', 'SRC_Cash_or_plan', 'SRC_Contactless',
                    'SRC_DB', 'SRC_DD', 'SRC_Pin', 'SRC_Transfer','eBay',
                    'Abbey_Sports', 'Currys_PC_World', 'Aldi', 'Europe', 'Udemy',
                    'Sports_direct.com', 'Wilko', 'Primark', 'B_and_Q', 'Tesco', 'Maryland',
                    'BP', 'Afro_supermarket', 'Amazon', 'Yours_Supermarket', 'Sainsbury',
                    'Asda', 'LinkedIn', 'prediction', 'target_0', 'target_1', 'target'
                   ]
features = num_feat + categorical_feat

In [50]:
len(num_feat) + len(categorical_feat)

160

In [51]:
colmnMap = ColumnMapping()
colmnMap.target = 'target'
colmnMap.prediction = 'prediction'
colmnMap.numerical_features = num_feat
colmnMap.categorical_features = categorical_feat

<h5 style="color: green;">Model performance Dashboard</h5>

In [53]:
xgb_model_performance = Dashboard(tabs=[ClassificationPerformanceTab(verbose_level=1)])
xgb_model_performance.calculate(X_train[features],
                                X_test[features], column_mapping = colmnMap)

xgb_model_performance.save("../Reports/XGBoost classifier training vs test performance dashboard.html")

<h3 style="color: green;">Probabilistic classification performance</h3>

In [60]:
# Formating the target as required for probabilistic classification performance
X_train['target'] = [train_probas.columns.tolist()[x] for x in X_train['target']]

X_test['target'] = [test_probas.columns.tolist()[x] for x in X_test['target']]

In [61]:
test_probas.columns

Index(['target_0', 'target_1'], dtype='object')

In [56]:
for x in probas_name:
    num_feat.append(x)

features = num_feat + categorical_feat

In [57]:
colmnMap = ColumnMapping()
colmnMap.target = 'target'
colmnMap.prediction = train_probas.columns.tolist()
colmnMap.numerical_features = num_feat
colmnMap.categorical_features = categorical_feat

In [1]:
xgb_classi_prob_performance = Dashboard(tabs=[ProbClassificationPerformanceTab(verbose_level=1)])
xgb_classi_prob_performance.calculate(X_train[features],
                                X_test[features], column_mapping = colmnMap)

xgb_classi_prob_performance.save("../Reports/XGBoost classifier probabilistic performance dashboard.html")