In [22]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import pandas as pd
import numpy as np

In [3]:
train = pd.read_csv("inputs/Predicted_train.csv")
test = pd.read_csv("inputs/Predicted_test.csv")
val = pd.read_csv("inputs/Predicted_val.csv")

In [32]:
# Function to check model drift
def check_model_drift(ref_metric_dict,cur_metric_dict,type='classification',tol=0.1):
    if type == 'classification':
        precision_change = abs((cur_metric_dict['Precision']-ref_metric_dict['Precision'])/ref_metric_dict['Precision'])
        recall_change = abs((cur_metric_dict['Recall']-ref_metric_dict['Recall'])/ref_metric_dict['Recall'])
        roc_auc_change = abs((cur_metric_dict['Roc-Auc']-ref_metric_dict['Roc-Auc'])/ref_metric_dict['Roc-Auc'])

        counter = 0
        for i in [precision_change,recall_change,roc_auc_change]:
            if i > tol:
                counter += 1

        if counter > 0:
            print("ALERT! There is a model drift.")
            print("Change in Precision: "+ str(np.round(100*precision_change,2))+"%")
            print("Change in Recall: "+ str(np.round(100*recall_change,2))+"%")
            print("Change in Roc-Auc: "+ str(np.round(100*roc_auc_change,2))+"%")
            return 1
        else:
            print("There is no model drift.")
            return 0

    elif type == 'regression':
        r2_change = abs((cur_metric_dict['R2_score']-ref_metric_dict['R2_score'])/ref_metric_dict['R2_score'])
        rmse_change = abs((cur_metric_dict['RMSE']-ref_metric_dict['RMSE'])/ref_metric_dict['RMSE'])
        mae_change = abs((cur_metric_dict['MAE']-ref_metric_dict['MAE'])/ref_metric_dict['MAE'])
        
        counter = 0
        for i in [rmse_change,mae_change, r2_change]:
            if i > tol:
                counter += 1

        if counter > 0:
            print("ALERT! There is a model drift.")
            R2_CHANGE = np.round(r2_change, 2)
            RMSE_CHANGE = np.round(100*rmse_change,2)
            MAE_CHANGE = np.round(100*mae_change,2)
            print("Change in R2 Score: "+ str(np.round(r2_change,2)))
            print("Change in RMSE: "+ str(np.round(100*rmse_change,2))+"%")
            print("Change in MAE: "+ str(np.round(100*mae_change,2))+"%")
            return 1, RMSE_CHANGE, MAE_CHANGE, R2_CHANGE
        else:
            print("There is no model drift.")
            RMSE_CHANGE = 'NONE'
            MAE_CHANGE = 'NONE'
            return 0, RMSE_CHANGE, MAE_CHANGE
        
        

In [1]:
def finalize_model(new_perform_dict, old_perform_dict):
    count = 0
    for metric in new_perform_dict.keys():
        if new_perform_dict[metric] < old_perform_dict[metric]:
            count += 1
    
    if count > 0:
        return 'New Model'
    else:
        return 'Old Model'

In [33]:
def model_monitoring(test):
    actual = test['PRICE_IN_LAKHS']
    predicted = test['PREDICTED_PRICE_IN_LAKHS']

    r2score = r2_score(actual,predicted)
    rmse = np.sqrt(mean_squared_error(actual,predicted))
    mae = np.sqrt(mean_absolute_error(actual,predicted))
#     print("RMSE: ", rmse)
#     print("MAE: ", mae)

    scoring_ref_metrics = {}
    scoring_ref_metrics['R2_score'] = r2score
    scoring_ref_metrics['RMSE'] = rmse
    scoring_ref_metrics['MAE'] = mae #+ 0.2*mae
#     print(scoring_ref_metrics)
    
    
    # Loading the reference performance dict (from training)
    with open('model/MODEL_XGB_PERFM_METRICS.pkl', 'rb') as F:
        model_ref_metric = pickle.load(F)
        
#     print(model_ref_metric)
    
    # Check for model drift
    model_drift, RMSE_CHANGE, MAE_CHANGE = check_model_drift(model_ref_metric,scoring_ref_metrics,type='regression',tol=0.1)
    
    # Log values
    log = {}
    #log['Time Period'] = str(batch_df['ADMISSION_DATE'].min()) + ' to ' + str(batch_df['ADMISSION_DATE'].max())
    #log['Total Records'] = batch_df.shape[0]
    log['Scoring Metrics'] = scoring_ref_metrics
    log['Training Metrics'] = model_ref_metric
    log['Model Drift IND'] = model_drift
    log['RMSE Change'] = RMSE_CHANGE
    log['MAE Change'] = MAE_CHANGE
    
    return log

In [34]:
log = model_monitoring(test)
log

ALERT! There is a model drift.
Change in R2 Score: 0.0
Change in RMSE: 0.0%
Change in MAE: 65.14%


{'Scoring Metrics': {'R2_score': 0.9226522226658003,
  'RMSE': 10.84660054827755,
  'MAE': 2.8685646151777857},
 'Training Metrics': {'R2_score': 0.9226522226658003,
  'RMSE': 10.84660054827755,
  'MAE': 8.228662951450076},
 'Model Drift IND': 1,
 'RMSE Change': 0.0,
 'MAE Change': 65.14}

In [None]:
# logging the training performance metrics and the trained model
precision = metrics.precision_score(y_train_encode,y_pred_class_encode)
recall = metrics.recall_score(y_train_encode,y_pred_class_encode)
roc_auc = metrics.roc_auc_score(y_train_encode,y_pred)

training_performance_metrics = dict()
training_performance_metrics['Precision'] = np.round(precision,2)
training_performance_metrics['Recall'] = np.round(recall,2)
training_performance_metrics['Roc-Auc'] = np.round(roc_auc,2)

print(training_performance_metrics)

with open('Training_Perfrom_Metrics.pkl','wb') as F:
    pickle.dump(training_performance_metrics,F)

with open('RF_Loan_Model.pkl','wb') as F:
    pickle.dump(rf,F)


In [None]:
class Model_drift:
    def __init__(self, model_type):
        self.model_type = model_type


    def finalize_model(self, new_perform_dict, old_perform_dict):
        count = 0
        for metric in new_perform_dict.keys():
            if new_perform_dict[metric] < old_perform_dict[metric]:
                count += 1
        
        if count > 0:
            return 'New Model'
        else:
            return 'Old Model'


    # Function to check model drift
    def check_model_drift(ref_metric_dict,cur_metric_dict,model_type='classification',tol=0.1):
        if type == 'classification':
            precision_change = abs((cur_metric_dict['Precision']-ref_metric_dict['Precision'])/ref_metric_dict['Precision'])
            recall_change = abs((cur_metric_dict['Recall']-ref_metric_dict['Recall'])/ref_metric_dict['Recall'])
            roc_auc_change = abs((cur_metric_dict['Roc-Auc']-ref_metric_dict['Roc-Auc'])/ref_metric_dict['Roc-Auc'])
    
            counter = 0
            for i in [precision_change,recall_change,roc_auc_change]:
                if i > tol:
                    counter += 1
    
            if counter > 0:
                print("ALERT! There is a model drift.")
                print("Change in Precision: "+ str(np.round(100*precision_change,2))+"%")
                print("Change in Recall: "+ str(np.round(100*recall_change,2))+"%")
                print("Change in Roc-Auc: "+ str(np.round(100*roc_auc_change,2))+"%")
                return 1
            else:
                print("There is no model drift.")
                return 0
    
        elif type == 'regression':
            r2_change = abs((cur_metric_dict['R2_score']-ref_metric_dict['R2_score'])/ref_metric_dict['R2_score'])
            rmse_change = abs((cur_metric_dict['RMSE']-ref_metric_dict['RMSE'])/ref_metric_dict['RMSE'])
            mae_change = abs((cur_metric_dict['MAE']-ref_metric_dict['MAE'])/ref_metric_dict['MAE'])
            
            counter = 0
            for i in [rmse_change,mae_change, r2_change]:
                if i > tol:
                    counter += 1
    
            if counter > 0:
                print("ALERT! There is a model drift.")
                R2_CHANGE = np.round(r2_change, 2)
                RMSE_CHANGE = np.round(100*rmse_change,2)
                MAE_CHANGE = np.round(100*mae_change,2)
                print("Change in R2 Score: "+ str(np.round(r2_change,2)))
                print("Change in RMSE: "+ str(np.round(100*rmse_change,2))+"%")
                print("Change in MAE: "+ str(np.round(100*mae_change,2))+"%")
                return 1, RMSE_CHANGE, MAE_CHANGE, R2_CHANGE
            else:
                print("There is no model drift.")
                RMSE_CHANGE = 'NONE'
                MAE_CHANGE = 'NONE'
                R2_CHANGE = 'NONE'
                return 0, RMSE_CHANGE, MAE_CHANGE, R2_CHANGE



        def retrain_model(self):
            # Loading the scoring data
            data = pd.DataFrame(pd.read_sql(retraining_batch_query(cut_off_date),conn))
            data.columns = [col.upper() for col in data.columns.tolist()]
            print(data.shape)
            #display(data.head())
    
            # Splitting the data into Train and Test set
            import pytz    
            from datetime import datetime, timedelta
            tz_NY = pytz.timezone('Asia/Kolkata')
    
            max_date = data.ADMISSION_DATE.max()
            min_date = max_date - timedelta(days=7)
    
            data_train = data[(data['ADMISSION_DATE'] <= min_date)]
            data_test = data[(data['ADMISSION_DATE'] >= min_date) & (data['ADMISSION_DATE'] <= max_date)]
    
    
            # Applying the preprocessing steps
            df_train_processed = LOS_Preprocessing.preprocess_data(data_train)
            print(df_train_processed.shape)
    
            df_test_processed = LOS_Preprocessing.preprocess_data(data_test)
            print(df_test_processed.shape)
    
            # Performing feature selection
            df_final = df_train_processed.copy()
            print(df_final.shape)
        #     display(df_final.head())
            print("Feature Selection Started..")
            model_feats = feature_selection(df_final)
            print(model_feats)
            model_feats.remove('LOS')
    
            # Model Building
            import xgboost as xgb
    
            xgb_ = xgb.XGBRegressor()
            xgb_.fit(df_final[model_feats],df_final['LOS'])
    
            df_test_final = check_n_create_model_features(df_test_processed,model_feats)
            if 'LOS' in df_test_final.columns.tolist():
                df_test_final = df_test_final.drop('LOS',axis=1)
            preds = np.ceil(xgb_.predict(df_test_final))
            rmse = np.sqrt(metrics.mean_squared_error(df_test_processed['LOS'],preds))
            mae = np.sqrt(metrics.mean_absolute_error(df_test_processed['LOS'],preds))
            print("\n Test Performance (new model)")
            print("RMSE: ", rmse)
            print("MAE: ", mae)      
    
            # Saving the trained model
            booster = xgb_.get_booster()
            booster.save_model('./Retraining Artifacts/MODEL_XGB.model')
    
            model_xgb_metrics_new = {}
            model_xgb_metrics_new['RMSE'] = rmse
            model_xgb_metrics_new['MAE'] = mae
    
            import pickle
    
            with open('./Retraining Artifacts/MODEL_XGB_PERFM_METRICS.pkl','wb') as F:
                pickle.dump(model_xgb_metrics_new,F)
    
    
            # Getting the predictions from the old model
            model = xgboost.XGBRegressor()
            model.load_model('MODEL_XGB.model')
        #     df_test_processed['PREDICTED_LOS'] = np.ceil(model.predict(df_test_processed[model_feats]))
    
            with open('MODEL_FEATS.pkl','rb') as F:
                model_feats_old = pickle.load(F)
    
            df_test_final = check_n_create_model_features(df_test_processed,model_feats_old)
            if 'LOS' in df_test_final.columns.tolist():
                df_test_final = df_test_final.drop('LOS',axis=1)
            preds = np.ceil(model.predict(df_test_final))
            rmse = np.sqrt(metrics.mean_squared_error(df_test_processed['LOS'],preds))
            mae = np.sqrt(metrics.mean_absolute_error(df_test_processed['LOS'],preds))
            print("\n Test Performance (old model)")
            print("RMSE: ", rmse)
            print("MAE: ", mae)   
    
            model_xgb_metrics_old = {}
            model_xgb_metrics_old['RMSE'] = rmse
            model_xgb_metrics_old['MAE'] = mae
        
        return model_xgb_metrics_new, model_xgb_metrics_old





    def deploy_model(self, selector='Old Model'):
        if selector != 'Old Model':
            # STEP-1:
            # Loading the old model
            with open('MODEL_FEATS.pkl','rb') as F:
                old_feats = pickle.load(F)
            with open('MODEL_XGB.model','rb') as F:
                old_model = pickle.load(F)
            with open('MODEL_XGB_PERFM_METRICS.pkl','rb') as F:
                old_perfm_dict = pickle.load(F)
                
            # Saving the copy to Archive folder
            with open('./Archive/MODEL_FEATS.pkl','wb') as F:
                pickle.dump(old_feats,F)
            with open('./Archive/MODEL_XGB.model','wb') as F:
                pickle.dump(old_model,F)
            with open('./Archive/MODEL_XGB_PERFM_METRICS.pkl','wb') as F:
                pickle.dump(old_perfm_dict,F)
                
            # STEP-2:
            # Loadin the new model
            with open('./Retraining Artifacts/MODEL_FEATS.pkl','rb') as F:
                new_feats = pickle.load(F)
            with open('./Retraining Artifacts/MODEL_XGB.model','rb') as F:
                new_model = pickle.load(F)
            with open('./Retraining Artifacts/MODEL_XGB_PERFM_METRICS.pkl','rb') as F:
                new_perfm_dict = pickle.load(F)
                
            # Replacing the old model artifacts with the new model
            with open('MODEL_FEATS.pkl','wb') as F:
                pickle.dump(new_feats,F)
            with open('MODEL_XGB.model','wb') as F:
                pickle.dump(new_model,F)
            with open('MODEL_XGB_PERFM_METRICS.pkl','wb') as F:
                pickle.dump(new_perfm_dict,F)
                
        return 'Deployment Successful'
            
        
        

    