In [None]:
# require package import
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import time
import numpy as np
import lime
from lime import lime_tabular
from lime import submodular_pick

### randomforest

In [None]:
# load data
train_df = pd.read_csv('../data/preprocessed_data/train_data.csv')
valid_df = pd.read_csv('../data/preprocessed_data/valid_data.csv')

x_train = train_df.drop(columns = ['cust_no', 'label'])
y_train = train_df['label']
x_valid = valid_df.drop(columns = ['cust_no', 'label'])
y_valid = valid_df['label']

x_train.fillna(0, inplace=True)
x_valid.fillna(0, inplace=True)

In [None]:
# define model
start_time = time.time()
rf = RandomForestClassifier(n_estimators=500, random_state=0)
rf.fit(x_train, y_train)
end_time = time.time()
print('걸린 시간 :', end_time - start_time)
y_pred = rf.predict(x_valid)
print('Accuracy: {:.2f}'.format(accuracy_score(y_valid, y_pred)))
print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

### lime

In [None]:
# lime
explainer = lime_tabular.LimeTabularExplainer(
    training_data=np.array(x_train),
    feature_names=x_train.columns,
    class_names=['-1', '0', '1'],
    mode='classification'
)

exp = explainer.explain_instance(
    data_row=x_train.iloc[1], # 확인하고 싶은 데이터의 인덱스
    predict_fn=rf.predict_proba,
    top_labels=3
)

exp.show_in_notebook(show_table=True)

### SP-LIME

In [None]:
sp_exp = submodular_pick.SubmodularPick(explainer,
                                        x_train.values,
                                        predict_fn = rf.predict_proba,
                                        num_features = 83,
                                        num_exps_desired = 5)

In [None]:
# SP-LIME visualization
[exp.show_in_notebook() for exp in sp_exp.sp_explanations]
print('SP-LIME Explanations.')