# ML Monitoring - Model classification

## Overview

In this notebook we focus on model analysis. Specifically feature sliced model analysis and model performance over time and good vs bad performance

## Dataset:

* Contains campaign, past products purchased and user activity information from data from 2021-01-01 to 2021-12-31
* Select users for maket campaigns based on wether user will purchase a product in next 3 months
## Assumptions

* Initial model deployment on march 2019
* True labels/ actual demand available over the weekend
* Model monitored on weekly basis

## Libraries

In [1]:
import pandas as pd
import numpy as np
from datetime import date,datetime,timedelta
import time
from sklearn.model_selection import train_test_split
from rdt import HyperTransformer
import xgboost as xgb
import optuna
import traceback
import shap
import pickle
import logging
from eli5 import show_weights
from eli5.sklearn import PermutationImportance
from importlib import reload
import logging
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix
from evidently import ColumnMapping
from evidently.analyzers.stattests import StatTest
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataDriftTab, CatTargetDriftTab, NumTargetDriftTab, RegressionPerformanceTab
from evidently.options import DataDriftOptions
from evidently.model_profile import Profile
from evidently.model_profile.sections import DataDriftProfileSection, NumTargetDriftProfileSection
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
import os
import json
import mlflow
from functools import wraps
from mlflow.tracking import MlflowClient
import plotly.io as pio
# remove static rendering to allow interactive mode
pio.renderers.default = "svg"
%matplotlib inline

In [2]:
# set up logging
reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## Data

### Load data

In [3]:
df = pd.read_csv('train_wn75k28.csv', index_col=0)

In [4]:
logging.info(f'dataset shape : {df.shape}')

03:16:27 INFO:dataset shape : (39161, 18)


In [5]:
# set random state
rng = np.random.RandomState(0)

In [6]:
df

Unnamed: 0_level_0,created_at,campaign_var_1,campaign_var_2,products_purchased,signup_date,user_activity_var_1,user_activity_var_2,user_activity_var_3,user_activity_var_4,user_activity_var_5,user_activity_var_6,user_activity_var_7,user_activity_var_8,user_activity_var_9,user_activity_var_10,user_activity_var_11,user_activity_var_12,buy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,2021-01-01,1,2,2.0,2020-09-24,0,0,0,0,0,0,1,1,0,0,0,0,0
2,2021-01-01,2,1,2.0,2020-09-19,1,0,1,0,0,0,1,0,0,0,0,0,0
3,2021-01-01,9,3,3.0,2021-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0
4,2021-01-01,6,7,2.0,2017-10-04,0,0,0,0,0,0,0,0,0,0,0,0,0
5,2021-01-01,4,6,,2020-06-08,0,0,0,0,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39157,2021-12-31,11,11,2.0,2017-10-19,1,0,0,0,1,1,1,0,0,0,0,0,0
39158,2021-12-31,3,9,3.0,,0,0,0,0,0,0,0,0,0,0,0,0,0
39159,2021-12-31,8,7,2.0,,1,0,0,0,1,0,1,0,0,0,0,0,0
39160,2021-12-31,7,12,2.0,,0,0,0,0,0,1,0,0,0,0,1,0,0


In [7]:
df.head(2)

Unnamed: 0_level_0,created_at,campaign_var_1,campaign_var_2,products_purchased,signup_date,user_activity_var_1,user_activity_var_2,user_activity_var_3,user_activity_var_4,user_activity_var_5,user_activity_var_6,user_activity_var_7,user_activity_var_8,user_activity_var_9,user_activity_var_10,user_activity_var_11,user_activity_var_12,buy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,2021-01-01,1,2,2.0,2020-09-24,0,0,0,0,0,0,1,1,0,0,0,0,0
2,2021-01-01,2,1,2.0,2020-09-19,1,0,1,0,0,0,1,0,0,0,0,0,0


In [8]:
df = df.where(pd.notnull(df), np.nan)

In [9]:
# Split the dataset
train_df, test_df = train_test_split(df,
                                     test_size=0.2,
                                     shuffle=False,
                                     random_state=rng)
eval_df, serve_df = train_test_split(test_df,
                                     test_size=0.5,
                                     shuffle=False,
                                     random_state=rng)

In [10]:
logging.info(f'train_df shape : {train_df.shape}')
logging.info(f'eval_df shape : {eval_df.shape}')
logging.info(f'serve_df shape : {serve_df.shape}')

03:16:29 INFO:train_df shape : (31328, 18)
03:16:29 INFO:eval_df shape : (3916, 18)
03:16:29 INFO:serve_df shape : (3917, 18)


### Data preprocessing

In [11]:
def encode(df):
    for col in df.columns:
        num_unique = df[col].nunique()
        #print(f'{col} : {num_unique}')
        if num_unique  == 2:
            df[col] = df[col].astype(bool)
        else:
            if col.startswith('user_activity_var'):
                df[col] = df[col].astype('category')
    return df

In [12]:
def preprocessing(df):
    df.created_at = pd.to_datetime(df.created_at)
    df.signup_date = pd.to_datetime(df.signup_date)
    df['is_sign_up_info_available'] = df.signup_date.isnull().astype(bool)
    #df['created_year'] = df.created_at.dt.year
    df['created_month'] = df.created_at.dt.month
    df['created_week'] = df.created_at.dt.week
    df['created_day_of_week'] = df.created_at.dt.day_of_week
    df['lead_sign_up_diff'] = (df.created_at - df.signup_date).dt.days
    df['products_purchased_info_available'] = df.products_purchased.isnull().astype(bool)
    df.fillna(0,inplace=True)
    df = encode(df)
    
    #print(df.isna().sum())
    return df

In [13]:
train_df = preprocessing(train_df)
eval_df = preprocessing(eval_df)
serve_df = preprocessing(serve_df)


Series.dt.weekofyear and Series.dt.week have been deprecated.  Please use Series.dt.isocalendar().week instead.



In [14]:
def split_features_target(df):
    target =  'buy'
    drop_feats = ['signup_date']
    y = df[target]
    X = df.drop(columns = drop_feats+[target])
    return X,y

In [15]:
X,y = split_features_target(train_df)
X_eval,y_eval = split_features_target(eval_df)
X_serve,y_serve = split_features_target(serve_df)

### Data transformation

In [16]:
ht = HyperTransformer()

In [17]:
ht.detect_initial_config(data=X)

Detecting a new config from the data ... SUCCESS
Setting the new config ... SUCCESS
Config:
{
    "sdtypes": {
        "created_at": "datetime",
        "campaign_var_1": "numerical",
        "campaign_var_2": "numerical",
        "products_purchased": "numerical",
        "user_activity_var_1": "categorical",
        "user_activity_var_2": "boolean",
        "user_activity_var_3": "boolean",
        "user_activity_var_4": "boolean",
        "user_activity_var_5": "categorical",
        "user_activity_var_6": "categorical",
        "user_activity_var_7": "categorical",
        "user_activity_var_8": "categorical",
        "user_activity_var_9": "boolean",
        "user_activity_var_10": "boolean",
        "user_activity_var_11": "categorical",
        "user_activity_var_12": "boolean",
        "is_sign_up_info_available": "boolean",
        "created_month": "numerical",
        "created_week": "numerical",
        "created_day_of_week": "numerical",
        "lead_sign_up_diff": "numeri

In [18]:
ht.fit(X)
X_transformed = ht.transform(X)
X_eval_transformed = ht.transform(X_eval)
X_serve_transformed = ht.transform(X_serve)


The data contains 1 new categories that were not seen in the original data (examples: {3}). Assigning them random values. If you want to model new categories, please fit the transformer again with the new data.



## Modelling

In [19]:

def fit_model(X,y, X_test,y_test, params=None):
    if params:
        clf = xgb.XGBClassifier(**params)
    else:
        clf = xgb.XGBClassifier()
    clf.fit(X,y,eval_set=[(X_test,y_test)],early_stopping_rounds=100,verbose=False)
    
    return clf

In [20]:
clf = fit_model(X_transformed,y, X_eval_transformed,y_eval)


`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.



## Model analysis

#### Metric selection

In [21]:
train_df.buy.value_counts() * 100 / train_df.buy.value_counts().sum()

False    94.678882
True      5.321118
Name: buy, dtype: float64

* Only 5.32 % users went on to buy  and rest didn't buy which means we have somewhat of rare class situation. Based on this statement, recall score could be a good choice. 
* If we are looking to reduce market campaigns cost then we will look to reduce both false positives(incorrectly selecting user for market campaign) and false negatives(falsely not selecting correct user for market campaign). In this case, F1 score would be a better choice.
* But given we have rare class situation if we select F1 score then number of selected users for market campaighn could be smaller than required size.

<b> Based on above reasons we will select recall score as the metric</b>

In [22]:
def rewrite_name(df_type, error_type):
    return f"{df_type}{'_'}{error_type}"

def get_scores(y_actual, y_predicted,df_type='train'):
    TP = 0
    FP = 0
    TN = 0
    FN = 0,
    #score = f1_score(y_actual, y_predicted)
    #precision = precision_score(y_actual, y_predicted)
    recall = recall_score(y_actual, y_predicted)
    #TP, FP, TN, FN = perf_measure(y_actual, np.where(clf.predict_proba(X) > threshold, 1, 0)[:,1])
    TN, FP, FN, TP = confusion_matrix(y_actual,y_predicted, labels=[0,1]).ravel()
    scores = { rewrite_name(df_type, 'recall'):recall, rewrite_name(df_type, 'TP'):TP, rewrite_name(df_type, 'FP'):FP, rewrite_name(df_type, 'TN'):TN, rewrite_name(df_type, 'FN'):FN}
    return scores

def dataset_results(X,y, model, df_type):

    y_pred = model.predict(X)
    
    return get_scores(y,y_pred,df_type)
    

### Feature sliced performance analysis

In [23]:
def get_feature_sliced_model_performance_per_dataset(features, model, X,y,  df_type):
    if features:
        return X.groupby(features).apply(lambda x:dataset_results(x.reset_index().set_index('id'),y.loc[x.index], model, df_type))
    else:
        return pd.DataFrame(dataset_results(X.reset_index().set_index('id'),y, model, df_type),index=[0])

In [24]:
def feature_sliced_performance_analysis_per_dataset(features, model, X,y, time_scale_str):
    
    train_results = get_feature_sliced_model_performance_per_dataset(features, model, X,y, time_scale_str)
    results = None
    if features:
        combined_results = train_results.to_frame(name = 'train').reset_index()
        results = pd.json_normalize(combined_results['train'])
        results[features] = combined_results[features]
     
        results = results[ features + [ col for col in results.columns if col not in features ] ]
        results[features] = ht.reverse_transform_subset(results[features])
    else:
        results = train_results
    return results

In [25]:
def feature_sliced_performance_analysis_train_test_serve(features, model, X,y, X_test,y_test, X_serve, y_serve ):
    
    train_results = get_feature_sliced_model_performance_per_dataset(features, model, X,y, 'train')
    test_results = get_feature_sliced_model_performance_per_dataset(features,model, X_test,y_test, 'eval')
    serve_results = get_feature_sliced_model_performance_per_dataset(features,model, X_serve,y_serve, 'serve')
    results = None
    if features:
        combined_results = train_results.to_frame(name = 'train').join(test_results.to_frame(name='eval')).join(serve_results.to_frame(name='serve')).reset_index()
        results = pd.json_normalize(combined_results['train']).join(pd.json_normalize(combined_results['eval'])).join(pd.json_normalize(combined_results['serve']))
        results[features] = combined_results[features]
     
        results = results[ features + [ col for col in results.columns if col not in features ] ]
        results[features] = ht.reverse_transform_subset(results[features])
    else:
        results = train_results.join(test_results).join(serve_results)
    results['overfit_train_eval'] = results['train_recall'] - results['eval_recall']
    results['overfit_train_serve'] = results['train_recall'] - results['serve_recall']
    results['overfit_eval_serve'] = results['eval_recall'] - results['serve_recall']
    return results

#### Overall results

In [26]:
model_results = feature_sliced_performance_analysis_train_test_serve([], clf, X_transformed,y, X_eval_transformed, y_eval, X_serve_transformed, y_serve)
model_results

Unnamed: 0,train_recall,train_TP,train_FP,train_TN,train_FN,eval_recall,eval_TP,eval_FP,eval_TN,eval_FN,serve_recall,serve_TP,serve_FP,serve_TN,serve_FN,overfit_train_eval,overfit_train_serve,overfit_eval_serve
0,0.59988,1000,28,29633,667,0.559748,89,7,3750,70,0.55814,96,16,3729,76,0.040132,0.04174,0.001609


In [59]:
perm = PermutationImportance(clf, random_state=1).fit(X_eval_transformed, y_eval)
show_weights(perm, feature_names=X_eval.columns.tolist())

Weight,Feature
0.0132  ± 0.0010,user_activity_var_9
0.0092  ± 0.0009,user_activity_var_2
0.0082  ± 0.0018,user_activity_var_11
0.0056  ± 0.0005,user_activity_var_4
0.0039  ± 0.0012,is_sign_up_info_available
0.0022  ± 0.0026,user_activity_var_5
0.0011  ± 0.0009,user_activity_var_8
0.0007  ± 0.0023,products_purchased
0.0006  ± 0.0011,campaign_var_1
0.0005  ± 0.0006,lead_sign_up_diff


In [63]:
model_results = feature_sliced_performance_analysis_train_test_serve(['user_activity_var_9.value'], clf, X_transformed,y, X_eval_transformed, y_eval, X_serve_transformed, y_serve)
#model_results.insert(1,'state.value', model_results['warehouse_ID.value'].map(df[['warehouse_ID','state']].drop_duplicates().set_index('warehouse_ID').to_dict()['state']))

In [64]:
model_results

Unnamed: 0,user_activity_var_9.value,train_recall,train_TP,train_FP,train_TN,train_FN,eval_recall,eval_TP,eval_FP,eval_TN,eval_FN,serve_recall,serve_TP,serve_FP,serve_TN,serve_FN,overfit_train_eval,overfit_train_serve,overfit_eval_serve
0,False,0.480934,618,28,29633,667,0.421488,51,7,3750,70,0.472222,68,16,3729,76,0.059446,0.008712,-0.050735
1,True,1.0,382,0,0,0,1.0,38,0,0,0,1.0,28,0,0,0,0.0,0.0,0.0


### Residual analysis

to continue