In [1]:
import pickle
import pandas as pd
import config
import matplotlib.pyplot as plt
%matplotlib inline

#Importing the module
from eli5 import show_weights
from eli5.sklearn import PermutationImportance
from eli5 import show_prediction



In [2]:
def load_model(modelname):

    # load the model from disk
    loaded_model = pickle.load(open(f"{config.MODEL_PATH}{modelname}.bin", 'rb'))

    return loaded_model

    pass

In [3]:
def load_data():

    # read the training data
    X_train =  pickle.load(open( f'{config.INPUT_PATH}X_train.pkl', "rb" )) 
    y_train = pickle.load(open( f'{config.INPUT_PATH}y_train.pkl', "rb" ))

    # read test data
    X_test = pickle.load(open( f'{config.INPUT_PATH}X_test.pkl', "rb" )) 
    y_test = pickle.load(open( f'{config.INPUT_PATH}y_test.pkl', "rb" )) 

    return X_train, y_train, X_test, y_test


In [4]:
def get_features_name():
    # load datasets

    return list(pd.read_pickle(f"{config.INPUT_PATH}X_train.pkl").columns)

### Load Datasets

In [7]:
# load train and validation datasets
X_train, y_train, X_test, y_test = load_data() 

### ID3

In [15]:
model = load_model("decision_tree_gini")

In [16]:
# load train and validation datasets
X_train, y_train, X_test, y_test = load_data() 

In [17]:
#Permutation Importance
perm = PermutationImportance(model, scoring = 'roc_auc' ,random_state=101).fit(X_test, y_test)
show_weights(perm, feature_names = list(X_test.columns))

Weight,Feature
0.1086  ± 0.0014,lon_x
0.0325  ± 0.0008,condicion_std
0.0251  ± 0.0014,PRODUCTO_12_saldo__rolling_sum_t6
0.0147  ± 0.0009,PRODUCTO_1_saldo__diff_p3
0.0125  ± 0.0007,PRODUCTO_0_condicion_
0.0069  ± 0.0005,condicion_mean
0.0033  ± 0.0004,PRODUCTO_0_saldo__count
0.0028  ± 0.0008,saldo_count
0.0022  ± 0.0002,PRODUCTO_11_condicion_
0.0014  ± 0.0000,PRODUCTO_1_condicion_


In [19]:
show_weights(model)

Weight,Feature
0.5039,x3
0.1767,x40
0.1411,x97
0.0790,x75
0.0357,x15
0.0239,x1
0.0162,x89
0.0116,x20
0.0068,x39
0.0039,x58


In [22]:
#Taking an example of test datafrom eli5 import show_prediction
show_prediction(model, X_test.iloc[56], show_feature_values=True)

Contribution?,Feature,Value
0.842,<BIAS>,1.0
0.032,condicion_std,0.0
0.021,PRODUCTO_12_saldo__rolling_sum_t6,-9999.0
0.012,PRODUCTO_0_condicion_,-9999.0
-0.038,PRODUCTO_1_saldo__diff_p3,-9999.0
-0.054,lon_x,-9999.0


### XGBOOST

In [5]:
model = load_model("xgb")

In [8]:
#Permutation Importance
perm = PermutationImportance(model, scoring = 'roc_auc' ,random_state=101).fit(X_test, y_test)
show_weights(perm, feature_names = list(X_test.columns))

Weight,Feature
0.0723  ± 0.0019,lon_x
0.0588  ± 0.0009,condicion_mean
0.0370  ± 0.0006,key_value_timespam
0.0351  ± 0.0010,saldo_count
0.0167  ± 0.0004,saldo_sum
0.0162  ± 0.0011,PRODUCTO_1_saldo__diff_p3
0.0084  ± 0.0009,PRODUCTO_0_saldo__count
0.0076  ± 0.0005,PRODUCTO_6_saldo__pct_p1
0.0074  ± 0.0005,saldo_mean
0.0069  ± 0.0008,PRODUCTO_12_saldo__rolling_sum_t6


In [9]:
show_weights(model, importance_type = 'gain')

Weight,Feature
0.1790,PRODUCTO_12_tipo_credito_
0.1431,PRODUCTO_12_saldo__rolling_sum_t6
0.0670,PRODUCTO_29_tipo_credito_
0.0573,PRODUCTO_0_condicion_
0.0572,lon_x
0.0541,condicion_mean
0.0412,PRODUCTO_1_condicion_
0.0333,key_value_timespam
0.0240,PRODUCTO_0_saldo__count
0.0232,PRODUCTO_6_tipo_credito_


In [14]:
#Taking an example of test datafrom eli5 import show_prediction
show_prediction(model, X_test.iloc[34], show_feature_values=True)

Contribution?,Feature,Value
0.496,lon_x,-9999.0
0.492,PRODUCTO_4_condicion_,13.0
0.387,PRODUCTO_6_saldo__pct_p6,1.017
0.311,condicion_mean,0.575
0.235,dto,1307.0
0.234,PRODUCTO_6_saldo__pct_p1,-0.0
0.198,PRODUCTO_15_saldo__rolling_mean_w3,-0.007
0.187,PRODUCTO_0_saldo__count,12.0
0.164,saldo_count,73.0
0.154,PRODUCTO_4_saldo__pct_p6,0.032


### LightGBM

In [24]:
model = load_model("lgbm")

In [25]:
#Permutation Importance
perm = PermutationImportance(model, scoring = 'roc_auc' ,random_state=101).fit(X_test, y_test)
show_weights(perm, feature_names = list(X_test.columns))

Weight,Feature
0.0714  ± 0.0019,lon_x
0.0272  ± 0.0006,key_value_timespam
0.0248  ± 0.0010,saldo_count
0.0198  ± 0.0006,condicion_mean
0.0191  ± 0.0005,saldo_sum
0.0179  ± 0.0012,PRODUCTO_1_saldo__diff_p3
0.0103  ± 0.0010,PRODUCTO_0_saldo__count
0.0085  ± 0.0005,saldo_mean
0.0069  ± 0.0006,PRODUCTO_6_tipo_credito_
0.0066  ± 0.0003,condicion_max


In [26]:
show_weights(model, importance_type = 'gain')

Weight,Feature
0.1168,lon_x
0.0871,condicion_mean
0.0859,PRODUCTO_12_saldo__rolling_sum_t6
0.0805,PRODUCTO_12_tipo_credito_
0.0485,PRODUCTO_1_saldo__diff_p3
0.0436,condicion_max
0.0402,saldo_sum
0.0385,PRODUCTO_0_condicion_
0.0343,saldo_count
0.0325,PRODUCTO_0_saldo__count


In [27]:
#Taking an example of test datafrom eli5 import show_prediction
show_prediction(model, X_test.iloc[34], show_feature_values=True)

Contribution?,Feature,Value
2.321,<BIAS>,1.0
0.335,PRODUCTO_1_saldo__diff_p3,0.0
0.113,PRODUCTO_12_saldo__rolling_sum_t6,-9999.0
0.094,PRODUCTO_0_condicion_,0.0
0.093,PRODUCTO_1_condicion_,0.0
0.083,saldo_sum,-2.909
0.076,PRODUCTO_12_tipo_credito_,-9999.0
0.071,PRODUCTO_5_saldo_,-9999.0
0.071,key_value_timespam,99.0
0.061,sexo,0.0
