In [147]:
### load modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import math

# scikit-learn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# Explainable AI
import shap

# 한글 폰트 설정
# plt.rcParams['font.family'] = 'NanumGothic'

In [148]:
### load example data
data = pd.read_csv('auto-mpg.csv')

### remove not need column
data_use = data[data.columns.difference(['name'])]
data_use = data_use.dropna(axis=0)

### change Dtype
#data_use = data_use.astype({'horsepower':'int'})

In [149]:
data_use

Unnamed: 0,acceleration,cylinders,displacement,horsepower,modelyear,mpg,origin,weight
0,12.0,8,307.0,130.0,70,18.0,1,3504
1,11.5,8,350.0,165.0,70,15.0,1,3693
2,11.0,8,318.0,150.0,70,18.0,1,3436
3,12.0,8,304.0,150.0,70,16.0,1,3433
4,10.5,8,302.0,140.0,70,17.0,1,3449
...,...,...,...,...,...,...,...,...
393,15.6,4,140.0,86.0,82,27.0,1,2790
394,24.6,4,97.0,52.0,82,44.0,2,2130
395,11.6,4,135.0,84.0,82,32.0,1,2295
396,18.6,4,120.0,79.0,82,28.0,1,2625


In [150]:
### train test select
X = data_use[data_use.columns.difference(['mpg'])]
y = data_use[["mpg"]]

In [151]:
# 랜덤포레스트 득점모델 학습
rf = RandomForestRegressor(random_state=0, n_jobs=-1)

In [152]:
rf.fit(X, y)

  """Entry point for launching an IPython kernel.


RandomForestRegressor(n_jobs=-1, random_state=0)

In [153]:
# train rmse
X_predict = rf.predict(X)
print("RMSE':{}".format(math.sqrt(mean_squared_error(X_predict, y))))

RMSE':1.0309196310645599


In [1]:
%matplotlib inline
 
ftr_importances_values = rf.feature_importances_
ftr_importances = pd.Series(ftr_importances_values, index=X.columns)
ftr_top = ftr_importances.sort_values(ascending=False)[:20]
 
plt.figure(figsize=(8, 6))
sns.barplot(x=ftr_top, y=ftr_top.index, palette = 'rocket')
plt.show()


ModuleNotFoundError: No module named 'matplotlib'

## SHAP
- one of Explainable AI method

In [None]:
# Fits the explainer
explainer = shap.Explainer(rf.predict, X)

# Calculates the SHAP values - It takes some time
shap_values = explainer(X)

In [None]:
shap.plots.bar(shap_values)

In [None]:
shap.summary_plot(shap_values, plot_type='violin')

In [None]:
shap.plots.bar(shap_values[0])

In [None]:
shap.plots.waterfall(shap_values[0])