In [22]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import pandas as pd
import numpy as np

In [3]:
train = pd.read_csv("inputs/Predicted_train.csv")
test = pd.read_csv("inputs/Predicted_test.csv")
val = pd.read_csv("inputs/Predicted_val.csv")

In [32]:
# Function to check model drift
def check_model_drift(ref_metric_dict,cur_metric_dict,type='classification',tol=0.1):
    if type == 'classification':
        precision_change = abs((cur_metric_dict['Precision']-ref_metric_dict['Precision'])/ref_metric_dict['Precision'])
        recall_change = abs((cur_metric_dict['Recall']-ref_metric_dict['Recall'])/ref_metric_dict['Recall'])
        roc_auc_change = abs((cur_metric_dict['Roc-Auc']-ref_metric_dict['Roc-Auc'])/ref_metric_dict['Roc-Auc'])

        counter = 0
        for i in [precision_change,recall_change,roc_auc_change]:
            if i > tol:
                counter += 1

        if counter > 0:
            print("ALERT! There is a model drift.")
            print("Change in Precision: "+ str(np.round(100*precision_change,2))+"%")
            print("Change in Recall: "+ str(np.round(100*recall_change,2))+"%")
            print("Change in Roc-Auc: "+ str(np.round(100*roc_auc_change,2))+"%")
            return 1
        else:
            print("There is no model drift.")
            return 0

    elif type == 'regression':
        r2_change = abs((cur_metric_dict['R2_score']-ref_metric_dict['R2_score'])/ref_metric_dict['R2_score'])
        rmse_change = abs((cur_metric_dict['RMSE']-ref_metric_dict['RMSE'])/ref_metric_dict['RMSE'])
        mae_change = abs((cur_metric_dict['MAE']-ref_metric_dict['MAE'])/ref_metric_dict['MAE'])
        
        counter = 0
        for i in [rmse_change,mae_change]:
            if i > tol:
                counter += 1

        if counter > 0:
            print("ALERT! There is a model drift.")
            RMSE_CHANGE = np.round(100*rmse_change,2)
            MAE_CHANGE = np.round(100*mae_change,2)
            print("Change in R2 Score: "+ str(np.round(rmse_change,2)))
            print("Change in RMSE: "+ str(np.round(100*rmse_change,2))+"%")
            print("Change in MAE: "+ str(np.round(100*mae_change,2))+"%")
            return 1, RMSE_CHANGE, MAE_CHANGE
        else:
            print("There is no model drift.")
            RMSE_CHANGE = 'NONE'
            MAE_CHANGE = 'NONE'
            return 0, RMSE_CHANGE, MAE_CHANGE
        
        

In [1]:
def finalize_model(new_perform_dict, old_perform_dict):
    count = 0
    for metric in new_perform_dict.keys():
        if new_perform_dict[metric] < old_perform_dict[metric]:
            count += 1
    
    if count > 0:
        return 'New Model'
    else:
        return 'Old Model'

In [33]:
def model_monitoring(test):
    actual = test['PRICE_IN_LAKHS']
    predicted = test['PREDICTED_PRICE_IN_LAKHS']

    r2score = r2_score(actual,predicted)
    rmse = np.sqrt(mean_squared_error(actual,predicted))
    mae = np.sqrt(mean_absolute_error(actual,predicted))
#     print("RMSE: ", rmse)
#     print("MAE: ", mae)

    scoring_ref_metrics = {}
    scoring_ref_metrics['R2_score'] = r2score
    scoring_ref_metrics['RMSE'] = rmse
    scoring_ref_metrics['MAE'] = mae #+ 0.2*mae
#     print(scoring_ref_metrics)
    
    
    # Loading the reference performance dict (from training)
    with open('model/MODEL_XGB_PERFM_METRICS.pkl', 'rb') as F:
        model_ref_metric = pickle.load(F)
        
#     print(model_ref_metric)
    
    # Check for model drift
    model_drift, RMSE_CHANGE, MAE_CHANGE = check_model_drift(model_ref_metric,scoring_ref_metrics,type='regression',tol=0.1)
    
    # Log values
    log = {}
    #log['Time Period'] = str(batch_df['ADMISSION_DATE'].min()) + ' to ' + str(batch_df['ADMISSION_DATE'].max())
    #log['Total Records'] = batch_df.shape[0]
    log['Scoring Metrics'] = scoring_ref_metrics
    log['Training Metrics'] = model_ref_metric
    log['Model Drift IND'] = model_drift
    log['RMSE Change'] = RMSE_CHANGE
    log['MAE Change'] = MAE_CHANGE
    
    return log

In [34]:
log = model_monitoring(test)
log

ALERT! There is a model drift.
Change in R2 Score: 0.0
Change in RMSE: 0.0%
Change in MAE: 65.14%


{'Scoring Metrics': {'R2_score': 0.9226522226658003,
  'RMSE': 10.84660054827755,
  'MAE': 2.8685646151777857},
 'Training Metrics': {'R2_score': 0.9226522226658003,
  'RMSE': 10.84660054827755,
  'MAE': 8.228662951450076},
 'Model Drift IND': 1,
 'RMSE Change': 0.0,
 'MAE Change': 65.14}

In [None]:
# logging the training performance metrics and the trained model
precision = metrics.precision_score(y_train_encode,y_pred_class_encode)
recall = metrics.recall_score(y_train_encode,y_pred_class_encode)
roc_auc = metrics.roc_auc_score(y_train_encode,y_pred)

training_performance_metrics = dict()
training_performance_metrics['Precision'] = np.round(precision,2)
training_performance_metrics['Recall'] = np.round(recall,2)
training_performance_metrics['Roc-Auc'] = np.round(roc_auc,2)

print(training_performance_metrics)

with open('Training_Perfrom_Metrics.pkl','wb') as F:
    pickle.dump(training_performance_metrics,F)

with open('RF_Loan_Model.pkl','wb') as F:
    pickle.dump(rf,F)
