### Print all Performance metrics:

#### Initialize the data frames to store and compare the performance metrics of the models.

I have this below code snippet to store the model performance scores in a data frame to compare the different models after the predictions.

### Hyper parameter tuning using Grid search :
#### Function to obtain the best model by performing hyperparameter tuning using GridSearchCV .

I have defined a function “get_best_hyperparameters” which does the hyperparameter tuning using GridSearchCV by taking classifier or regressor model as input. This function returns the best model which can be used to fit and predict. This step can be skipped if one just wants to build a basic model without performing any hyperparameter tuning.

In [3]:
# For both Classifier and Regressor
from sklearn.model_selection import GridSearchCV 
def get_best_hyperparameters(model, params, cv_value , X_train, y_train ): 
    search = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1, verbose=1,cv=cv_value) 
    search.fit(X_train, y_train)  
    print("Best Accuracy    :",  search.best_score_) 
    print("Best Parameters  : ", search.best_params_)
    print("Best Estimators : ",  search.best_estimator_)  
    best_grid = search.best_estimator_
    return best_grid

### Model Fit and Predict:
#### Function to fit and predict the model:

This function (for classifier and regressor) get_classifier_predictions / get_regressor_predictions takes in the model as input and returns the predicted train and test results. In case of classifier , it also returns predicted train and test probability.

In [4]:
#For Classifier
def get_classifier_predictions(classifier, X_train, y_train, X_test): 
    classifier.fit(X_train,y_train)
    y_pred_train =classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)
    y_pred_prob_train = classifier.predict_proba(X_train)
    y_pred_prob_test = classifier.predict_proba(X_test)
    return y_pred_train, y_pred_test, y_pred_prob_train,y_pred_prob_test

In [5]:
#For Regressor
def get_regressor_predictions(regressor, X_train, y_train, X_test):  
    regressor.fit(X_train,y_train)
    y_pred_train =regressor.predict(X_train)
    y_pred_test = regressor.predict(X_test)
    return y_pred_train, y_pred_test

### Performance Metrics:
#### Function to calculate and print the performance metrics of train and test dataset

The function print_classifier_scores / print_regressor_scores calculates and returns the dataset with all the performance metrics scores related to a classification / regression algorithm respectively .

In [6]:
# For Classifier
from sklearn.metrics import accuracy_score ,confusion_matrix ,precision_score , recall_score , f1_score, plot_confusion_matrix ,roc_auc_score
import matplotlib.pyplot as plt                                     # Importing pyplot interface to use matplotlib
%matplotlib inline
def print_classifier_scores(classifier, X_train, X_test, y_train ,y_test,y_pred_train, y_pred_test,y_pred_prob_train, y_pred_prob_test,algorithm):
# store classifier scores for Training Dataset
    v_recall_score_train =  recall_score(y_train,y_pred_train)
    v_precision_score_train = precision_score(y_train,y_pred_train)
    v_f1_score_train =  f1_score(y_train,y_pred_train)
    v_accuracy_score_train = accuracy_score(y_train,y_pred_train)
    v_roc_auc_train = roc_auc_score(y_train, y_pred_prob_train[:,1])
    
# print classifier scores for Training Dataset
    print('Train-Set Confusion Matrix:\n', confusion_matrix(y_train,y_pred_train)) 
    print("Recall Score    : ", v_recall_score_train)
    print("Precision Score : ", v_precision_score_train)
    print("F1 Score        : ", v_f1_score_train)
    print("Accuracy Score  : ", v_accuracy_score_train)
    print("ROC AUC         :  {}".format(v_roc_auc_train))
    print("Predict Probability  :" , y_pred_prob_train)
    plot_confusion_matrix(classifier, X_train , y_train , display_labels = ["1" , "0"])
    plt.grid(b=None)
# store classifier scores for Testing Dataset 
   
    v_recall_score_test =  recall_score(y_test,y_pred_test)
    v_precision_score_test = precision_score(y_test,y_pred_test)
    v_f1_score_test =  f1_score(y_test,y_pred_test)
    v_accuracy_score_test = accuracy_score(y_test,y_pred_test)
    v_roc_auc_test = roc_auc_score(y_test, y_pred_prob_test[:,1])
# Print classifier scores for Testing Dataset    
    print('Test-Set Confusion Matrix:\n', confusion_matrix(y_test,y_pred_test)) 
    print("Recall Score    : ", v_recall_score_test)
    print("Precision Score : ", v_precision_score_test)
    print("F1 Score        : ", v_f1_score_test)
    print("Accuracy Score  : ", v_accuracy_score_test)
    print("ROC AUC         :  {}".format(v_roc_auc_test))
    print("Predict Probability  :" , y_pred_prob_test)
    plot_confusion_matrix(classifier, X_test , y_test , display_labels = ["1" , "0"])
    plt.grid(b=None)
# store to append the results in dataframe for final comparison of performance 
    df_model_test_train_acc = dict({'Model' : algorithm, 'Train Accuracy Score' :v_accuracy_score_train,'Test Accuracy Score' :v_accuracy_score_test })
    df_model_performance = dict({'Model' : algorithm, 'Accuracy Score' :v_accuracy_score_test, 'F1 Score' : v_f1_score_test, 'Precision Score' : v_precision_score_test, 'Recall Score' :v_recall_score_test, 'ROC AUC' : v_roc_auc_test})
    
    return df_model_test_train_acc , df_model_performance

In [7]:
# For regressor 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
def print_regressor_scores(regressor, X_train, X_test, y_train ,y_test,y_pred_train, y_pred_test,algorithm):
    
    # store regressor scores for Training Dataset
    MAE_train = mean_absolute_error(y_train, y_pred_train)
    RMSE_train = np.sqrt( mean_squared_error(y_train, y_pred_train))
    r2_score_train = r2_score(y_train, y_pred_train)
    # Calculating Adjusted R2 for training set
    SS_Residual_train = sum((y_train-y_pred_train)**2)
    SS_Total_train = sum((y_train-np.mean(y_train))**2)
    r_squared_train = 1 - (float(SS_Residual_train))/SS_Total_train
    adj_r_sq_train = 1 - (1-r_squared_train)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)
    
    # print regressor scores for Training Dataset
    print('MAE for training set is {}'.format(MAE_train))
    print('RMSE for training set is {}'.format(RMSE_train))
    print('R squared score for training set is {}'.format(r2_score_train))
    print('Adjusted R squared score for training set is {}'.format(adj_r_sq_train))
    
    # store regressor scores for Test Dataset
    MAE_test = mean_absolute_error(y_test, y_pred_test)
    RMSE_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    r2_score_test = r2_score(y_test, y_pred_test)
    # Calculating Adjusted R2 for test set
    SS_Residual_test = sum((y_test-y_pred_test)**2)
    SS_Total_test = sum((y_test-np.mean(y_test))**2)
    r_squared_test = 1 - (float(SS_Residual_test))/SS_Total_test
    adj_r_sq_test = 1 - (1-r_squared_test)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
    
    # print regressor scores for Test Dataset 
    print('MAE for test set is {}'.format(MAE_test))
    print('RMSE for test set is {}'.format(RMSE_test))
    print('R squared score for test set is {}'.format(r2_score_test))
    print('Adjusted R squared score for testing set is {}'.format(adj_r_sq_test))
    
    # store to append the results in dataframe for final comparison of performance
    df_model_test_train_r2= dict({'Model' : algorithm, 'Train Adjusted R2 Score' :adj_r_sq_train,'Test Adjusted R2 Score' :adj_r_sq_test })
    df_model_performance = dict({'Model' : algorithm, 'MAE' : MAE_test, 'RMSE' : RMSE_test, 'R2 Score' : r2_score_test, 'Adjusted R2 Score' :adj_r_sq_test})
    return df_model_test_train_r2 , df_model_performance

In [1]:
 # For Classifier
import pandas as pd
import numpy as np
#This dataframe stores the scores from classifier models
df_model=pd.DataFrame(columns=['Model','Accuracy Score' ,'F1 Score', 'Precision Score' , 'Recall Score' ,'ROC AUC'])
df_model_performance =df_model
#This dataframe stores the train and test accuracy from classifier models to compare at the end of the model building. This can also be further modified to compare the other scores such as F1 score etc
df_model_test_train_acc = pd.DataFrame(columns=['Model' , 'Train Accuracy Score' ,'Test Accuracy Score'])
df_model_accuracy =df_model_test_train_acc

In [2]:
# For Regressor
import pandas as pd
import numpy as np
#This dataframe stores the scores from regressor models
df_model=pd.DataFrame(columns=['Model', 'MAE' ,'RMSE', 'R2 Score' , 'Adjusted R2 Score'])
df_model_performance =df_model
#This data frame stores the train and test "adjusted R2 scores" from regressor models to compare at the end of the model building. This can also be further modified to compare the other score such as MSE , RMSE  etc
df_model_test_train_r2 = pd.DataFrame(columns=['Model' , 'Train Adjusted R2 Score' ,'Test Adjusted R2 Score'])
df_model_r2 =df_model_test_train_r2