# Model Monitoring

In [None]:
!pip install evidently

In [None]:
import pandas as pd
from xgboost import XGBClassifier

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
from evidently.metric_preset import TargetDriftPreset
from evidently.metric_preset import DataDriftPreset

## Load Data

In [31]:
clean_data = 'data_clean.csv'
last_years = 7

numerical = ['Term','NoEmp','SecuredSBA',
             'GrDisburs','GrApprov','ApprovSBA']
categorical= ['State','BankState','Sector',
              'AppYear','AppMonth',
              'DifState','Secured','NewExist','Urban','Rural','RevLine','LowDoc']
target = 'Default'
prediction = 'Prediction'

In [32]:
df_clean = pd.read_csv(clean_data, low_memory=False)
df_clean.sample(3)

Unnamed: 0,State,BankState,DifState,Sector,AppYear,AppMonth,Term,NoEmp,Secured,NewExist,Urban,Rural,RevLine,LowDoc,GrDisburs,GrApprov,ApprovSBA,SecuredSBA,Default
652692,21,24,0,16,39,8,87,6,0,1,1,0,0,0,250000,250000,187500,75,0
363278,12,45,1,20,25,10,66,24,0,1,0,0,0,0,60000,60000,54000,90,0
583857,43,47,0,7,29,7,84,4,0,0,0,0,0,0,100000,100000,90000,90,0


In [33]:
threshold = max(df_clean['AppYear']) - last_years

reference = df_clean[df_clean['AppYear'] < threshold]
current = df_clean[df_clean['AppYear'] >= threshold]

print(f'Reference: {len(reference)}')
print(f'Current: {len(current)}')

Reference: 744271
Current: 150339


In [34]:
X_train = reference[numerical + categorical]
y_train = reference[target]

X_test = current[numerical + categorical]
y_test = current[target]

## Train Model

In [35]:
params = {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 300, 'random_state': 44}

xgb_model = XGBClassifier(**params)
xgb_model.fit(X_train, y_train)

In [36]:
val_acc = xgb_model.score(X_test, y_test)
print('Validation Accuracy: ', round(val_acc,4))

Validation Accuracy:  0.9195


In [None]:
# # Feature Importance
# features = pd.DataFrame(xgb_model.feature_importances_, index=X_train.columns, columns=['Importance'])
# features = features.sort_values(by='Importance', ascending=False)

# plt.figure(figsize=(9, 6))
# sns.set(style='whitegrid')

# sns.barplot(data=features, x='Importance', y=features.index, palette='cool', hue=features.index)

# plt.title('Feature Importance')
# plt.xlabel('Importance')
# plt.ylabel('Variables')
# plt.show()

## Performance

In [37]:
reference[prediction] = xgb_model.predict(X_train)
current[prediction] = xgb_model.predict(X_test)

In [38]:
column_mapping = ColumnMapping()

column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical
column_mapping.categorical_features = categorical

In [39]:
class_performance = Report(metrics=[ClassificationPreset()])
class_performance.run(current_data=reference,
                      reference_data=None,
                      column_mapping=column_mapping)

class_performance.show(mode='inline')

Output hidden; open in https://colab.research.google.com to view.

In [40]:
class_performance = Report(metrics=[ClassificationPreset()])
class_performance.run(current_data=current,
                      reference_data=reference,
                      column_mapping=column_mapping)

class_performance.show(mode='inline')

Output hidden; open in https://colab.research.google.com to view.

## Target Drift

In [41]:
target_drift = Report(metrics=[TargetDriftPreset()])
target_drift.run(current_data=current,
                      reference_data=reference,
                      column_mapping=column_mapping)

target_drift.show(mode='inline')

Output hidden; open in https://colab.research.google.com to view.

## Data Drift

In [42]:
class_performance = Report(metrics=[DataDriftPreset()])
class_performance.run(current_data=current,
                      reference_data=reference,
                      column_mapping=column_mapping)

class_performance.show(mode='inline')

Output hidden; open in https://colab.research.google.com to view.