# Prediction of Credit Approval

## Part 3: Explaining Selected Models Using SHAP

### Load the Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import warnings
import shap
warnings.filterwarnings("ignore")

from sklearn.pipeline import make_pipeline
from ipynb.fs.defs.func import histogram, measures, evaluation_plot
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold

### Load Preprocessed dataset

In [8]:
with open('x_train.pickle', 'rb')as file:
    x_train=pickle.load(file)
    
with open('x_test.pickle', 'rb')as file:
    x_test=pickle.load(file)
    
with open('y_train.pickle', 'rb')as file:
    y_train=pickle.load(file)
    
with open('y_test.pickle', 'rb')as file:
    y_test=pickle.load(file)

### Exproting SHAP Values for Interpretation

In [3]:
def result(x_test, shap_val_AB, modelname):
    result_matrix = np.zeros((len(x_test), len(x_test.columns)))

    for i in range(len(x_test)):
        for j in range(len(x_test.columns)):
            result_matrix[i][j] = shap_val_AB[i][j]
            file_name = f'Credit{modelname}.csv'       
            df = pd.DataFrame(result_matrix, columns=x_test.columns)
            df.to_csv(""+ file_name)

### Loading Models

In [5]:
with open('RandomForest.pkl', 'rb')as file:
    RF=pickle.load(file)

## SHAP

In [7]:
ex_RF= shap.KernelExplainer(RF.predict, x_test)
shap_val_RF = ex_RF.shap_values(x_test)

Using 138 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/138 [00:00<?, ?it/s]

In [8]:
result_RF = result(x_test, shap_val_RF, '_RF')

# Features to Select
## Class 1 Approvedk)

In [10]:
values=pd.read_excel(r'D:\Learn\master\Projects\Error Correction\Credit_RF.xlsx', sheet_name='Credit_RF')
values

Unnamed: 0,Gender,Age,Debt,YearsEmployed,PriorDefault,Employed,CreditScore,DriverLicense,ZipCode,Income,...,Ethnicity_h,Ethnicity_j,Ethnicity_n,Ethnicity_o,Ethnicity_v,Ethnicity_z,Citizen_g,Citizen_p,Citizen_s,ApprovalStatus
0,0.000000,0.070870,0.008600,-0.146247,0.160121,0.073409,0.009642,-0.018379,0.013131,-0.073411,...,-0.013958,-0.007136,0,0.000000,0.000000,-0.010619,0.007621,0.000000,0.013714,1
1,0.009186,-0.023685,-0.024702,-0.024146,-0.421158,0.024074,-0.004320,-0.004488,0.005620,-0.013405,...,-0.003718,0.000000,0,0.001855,-0.004944,0.000960,0.001878,-0.002543,0.004090,0
2,0.002136,-0.012663,-0.028080,-0.042058,0.103062,-0.073214,-0.055253,-0.004798,-0.100564,-0.045732,...,-0.028151,-0.002419,0,-0.001759,0.008391,-0.001678,-0.048775,0.000000,-0.059866,0
3,-0.001590,-0.001894,0.002141,0.005941,-0.306912,-0.021895,-0.025736,0.000000,-0.003643,-0.012975,...,-0.006844,-0.003760,0,0.000000,0.002362,0.001631,-0.001531,-0.003059,0.005533,0
4,0.004708,0.079832,-0.054583,-0.053682,0.263834,-0.040777,-0.035290,0.072902,0.120427,-0.053102,...,-0.024365,-0.015368,0,0.000000,0.000000,0.000000,0.008368,-0.002492,0.018129,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,-0.007495,-0.022966,0.006199,-0.090475,0.142765,-0.115052,-0.073628,0.004741,-0.089945,-0.079398,...,-0.062484,0.000000,0,0.000000,-0.014735,-0.007059,0.002828,-0.006018,0.009996,0
134,-0.002714,-0.010185,-0.051003,-0.045675,0.095352,-0.080694,-0.057263,-0.002716,-0.064983,-0.048188,...,-0.016560,0.003969,0,0.000000,-0.008283,0.000000,-0.048556,-0.004535,-0.062607,1
135,-0.004209,-0.010683,0.000000,-0.022367,-0.326610,-0.004212,-0.017997,-0.007577,-0.037268,-0.015496,...,-0.005392,-0.002923,0,0.000000,-0.005467,0.000000,0.000000,-0.004768,0.008332,0
136,0.003532,-0.009156,0.002470,-0.021866,-0.299392,-0.018993,-0.020764,0.000000,-0.010777,-0.015370,...,0.021361,0.004267,0,-0.002177,0.003877,0.002131,0.003149,-0.005062,0.002466,0


In [12]:
Class1=values.loc[values['ApprovalStatus']==1].drop('ApprovalStatus',axis=1)
Class1

Unnamed: 0,Gender,Age,Debt,YearsEmployed,PriorDefault,Employed,CreditScore,DriverLicense,ZipCode,Income,...,Ethnicity_ff,Ethnicity_h,Ethnicity_j,Ethnicity_n,Ethnicity_o,Ethnicity_v,Ethnicity_z,Citizen_g,Citizen_p,Citizen_s
0,0.0,0.07087,0.0086,-0.146247,0.160121,0.073409,0.009642,-0.018379,0.013131,-0.073411,...,-0.28649,-0.013958,-0.007136,0,0.0,0.0,-0.010619,0.007621,0.0,0.013714
5,0.0,-0.017005,0.023625,0.038748,0.110108,-0.145104,-0.082092,0.0,0.018162,-0.08819,...,0.004294,-0.015214,0.0,0,0.0,-0.004555,0.0,-0.017599,-0.006331,-0.037187
6,0.003753,0.00403,0.006449,0.019608,0.391623,0.035589,0.005573,0.0,0.027019,0.0,...,0.006039,0.020731,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
11,-0.003416,-0.003346,0.022935,0.039881,0.370379,-0.008535,-0.008134,-0.002347,0.002053,0.088209,...,0.003351,-0.003439,0.001648,0,-0.004047,0.001344,0.002292,0.001485,-0.004757,0.006613
18,0.0,-0.003333,-0.009631,-0.014453,0.381694,0.045295,0.071547,0.002693,0.009795,0.078272,...,0.002055,-0.004814,0.0,0,0.0,0.001941,0.0,0.004192,-0.002173,0.007177
19,0.0,0.0,0.005155,0.013373,0.407795,0.025412,0.037723,0.0,0.009963,0.035147,...,0.003442,0.009342,0.0,0,0.0,0.0,0.0,0.000197,0.0,0.003439
21,0.002191,-0.003226,0.016317,0.044233,0.373245,0.060784,0.084609,-0.004185,-0.014823,-0.004316,...,0.008406,-0.002824,0.0,0,0.001016,-0.00363,0.0,0.0,-0.001324,0.006296
22,0.0,0.001634,0.005992,0.02244,0.390998,0.039803,0.051616,0.0,0.025786,0.010482,...,0.005541,-0.005597,0.0,0,0.0,0.0,0.0,0.00264,0.0,0.004034
24,-0.00326,0.020101,0.020333,0.011131,0.374055,0.057781,0.067353,0.003538,-0.022679,-0.003853,...,0.003874,0.0,0.001754,0,0.001346,-0.004014,0.0,0.001504,0.0,0.002716
29,0.0,-0.010528,0.0,-0.011007,-0.433268,-0.007209,-0.013631,0.0,-0.008021,0.091518,...,0.002638,-0.004339,0.0,0,0.0,0.0,-0.001232,-0.003879,0.005777,0.003933


In [14]:
def count_shap_values(shap_values, feature_names):

    # Ensure shap_values is a NumPy array
    shap_values = np.array(shap_values)

    # Initialize a DataFrame to store the counts for each feature
    shap_counts = pd.DataFrame(index=['positive', 'negative', 'zero'], columns=feature_names)

    # Count positive, negative, and zero values for each feature
    shap_counts.loc['positive'] = (shap_values > 0).sum(axis=0)
    shap_counts.loc['negative'] = (shap_values < 0).sum(axis=0)
    shap_counts.loc['zero'] = (shap_values == 0).sum(axis=0)

    return shap_counts

In [16]:
shap_counts = count_shap_values(Class1, x_test.columns)
shap_counts_sorted_positive = shap_counts.sort_values(by='positive', axis=1, ascending=False).T
shap_counts_sorted_positive

Unnamed: 0,positive,negative,zero
PriorDefault,51,2,0
Ethnicity_ff,48,2,3
EducationLevel_ff,46,3,4
Married_y,44,9,0
Citizen_s,44,5,4
BankCustomer_g,43,10,0
Married_u,42,10,1
BankCustomer_p,42,9,2
EducationLevel_aa,41,4,8
YearsEmployed,37,14,2


# Features to Eliminate

In [21]:
shap_counts_sorted_negative = shap_counts.sort_values(by='negative', axis=1, ascending=False).T
shap_counts_sorted_negative

Unnamed: 0,positive,negative,zero
Ethnicity_h,15,32,6
EducationLevel_q,10,29,14
EducationLevel_cc,7,28,18
ZipCode,27,25,1
Citizen_p,2,21,30
EducationLevel_w,10,20,23
Age,28,20,5
EducationLevel_x,15,19,19
Debt,33,18,2
Employed,36,17,0


# Features to Select
## Class 0 (Rejected)

In [24]:
Class0 = values.loc[values['ApprovalStatus']==0].drop('ApprovalStatus',axis=1)
Class0

Unnamed: 0,Gender,Age,Debt,YearsEmployed,PriorDefault,Employed,CreditScore,DriverLicense,ZipCode,Income,...,Ethnicity_ff,Ethnicity_h,Ethnicity_j,Ethnicity_n,Ethnicity_o,Ethnicity_v,Ethnicity_z,Citizen_g,Citizen_p,Citizen_s
1,0.009186,-0.023685,-0.024702,-0.024146,-0.421158,0.024074,-0.004320,-0.004488,0.005620,-0.013405,...,0.000000,-0.003718,0.000000,0,0.001855,-0.004944,0.000960,0.001878,-0.002543,0.004090
2,0.002136,-0.012663,-0.028080,-0.042058,0.103062,-0.073214,-0.055253,-0.004798,-0.100564,-0.045732,...,0.008460,-0.028151,-0.002419,0,-0.001759,0.008391,-0.001678,-0.048775,0.000000,-0.059866
3,-0.001590,-0.001894,0.002141,0.005941,-0.306912,-0.021895,-0.025736,0.000000,-0.003643,-0.012975,...,0.002870,-0.006844,-0.003760,0,0.000000,0.002362,0.001631,-0.001531,-0.003059,0.005533
4,0.004708,0.079832,-0.054583,-0.053682,0.263834,-0.040777,-0.035290,0.072902,0.120427,-0.053102,...,0.011495,-0.024365,-0.015368,0,0.000000,0.000000,0.000000,0.008368,-0.002492,0.018129
7,0.000000,0.000000,0.281910,0.053103,-0.309315,0.000000,0.000000,0.000000,0.170680,0.004758,...,0.008339,0.000000,0.000000,0,0.000000,0.000000,0.000000,0.027330,0.248953,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,-0.003785,0.000000,-0.010128,-0.014471,-0.305913,-0.021589,-0.022471,0.000000,-0.021617,-0.022574,...,0.005955,0.022327,0.003244,0,0.000000,0.004596,0.000000,0.000000,-0.004435,0.000000
133,-0.007495,-0.022966,0.006199,-0.090475,0.142765,-0.115052,-0.073628,0.004741,-0.089945,-0.079398,...,0.008511,-0.062484,0.000000,0,0.000000,-0.014735,-0.007059,0.002828,-0.006018,0.009996
135,-0.004209,-0.010683,0.000000,-0.022367,-0.326610,-0.004212,-0.017997,-0.007577,-0.037268,-0.015496,...,0.000000,-0.005392,-0.002923,0,0.000000,-0.005467,0.000000,0.000000,-0.004768,0.008332
136,0.003532,-0.009156,0.002470,-0.021866,-0.299392,-0.018993,-0.020764,0.000000,-0.010777,-0.015370,...,0.004422,0.021361,0.004267,0,-0.002177,0.003877,0.002131,0.003149,-0.005062,0.002466


In [26]:
shap_counts = count_shap_values(Class0, x_test.columns)
shap_counts_sorted_negative = shap_counts.sort_values(by='negative', axis=1, ascending=False).T
shap_counts_sorted_negative

Unnamed: 0,positive,negative,zero
Income,12,72,1
CreditScore,14,70,1
EducationLevel_q,8,69,8
Ethnicity_h,17,67,1
Employed,19,65,1
PriorDefault,23,62,0
EducationLevel_w,9,61,15
EducationLevel_x,6,59,20
Citizen_p,6,57,22
ZipCode,28,56,1


# Features to Eliminate

In [28]:
shap_counts_sorted_positive = shap_counts.sort_values(by='positive', axis=1, ascending=False).T
shap_counts_sorted_positive

Unnamed: 0,positive,negative,zero
Citizen_s,60,6,19
Ethnicity_ff,60,7,18
BankCustomer_g,55,30,0
Married_y,53,30,2
Married_u,51,30,4
EducationLevel_aa,51,13,21
EducationLevel_ff,51,6,28
BankCustomer_p,49,31,5
EducationLevel_c,42,21,22
Citizen_g,39,13,33
