In [69]:
import pandas as pd
import xgboost as xgb

from sklearn.datasets import load_boston, load_breast_cancer
from sklearn.model_selection import train_test_split

from eli5.explain import explain_prediction, explain_weights
from eli5 import show_prediction




# Regression example 

In [88]:
# load boston data set from sklearn 

boston = load_boston()
X, y = boston.data, boston.target
fn = boston.feature_names

In [89]:
# create a pandas dataframe
df_reg = pd.DataFrame(X) 
df_reg.columns = fn
df_reg['target'] = y

In [34]:
df_reg.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [90]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(df_reg.drop('target',1),
                                                    df_reg['target'], test_size=0.33, random_state=1111)

In [91]:
# making a simple regression model 
reg = xgb.XGBRegressor(learning_rate=0.1, max_depth=4, n_estimators=200).fit(X_train, y_train)

In [61]:
# explaing weights with eli5
res = explain_weights(reg)
res

Weight,Feature
0.6139,LSTAT
0.1499,RM
0.0485,PTRATIO
0.039,DIS
0.0338,NOX
0.0323,TAX
0.0224,RAD
0.0156,AGE
0.0119,CRIM
0.0109,B


In [62]:
explain_me = X_test.iloc[1]
explain_prediction(reg,explain_me)

Contribution?,Feature
22.634,<BIAS>
0.713,PTRATIO
0.482,AGE
0.465,DIS
0.452,CRIM
0.391,NOX
0.121,RAD
0.116,B
0.048,INDUS
-0.009,CHAS


In [92]:
show_prediction(reg, X_test.iloc[44])

Contribution?,Feature
22.634,<BIAS>
4.732,LSTAT
0.635,PTRATIO
0.335,B
0.087,TAX
0.027,ZN
0.006,AGE
-0.009,CHAS
-0.058,NOX
-0.068,INDUS


# Classification example

In [93]:
# load the breast cancer dataset from sklearn
bc_data = load_breast_cancer()

X, y = bc_data.data, bc_data.target
fn = bc_data.feature_names

df_cls = pd.DataFrame(X) 
df_cls.columns = fn
df_cls['target'] = y

In [94]:
X_train, X_test, y_train, y_test = train_test_split(df_cls.drop('target',1),
                                                    df_cls['target'], test_size=0.33, random_state=1111)

In [95]:
cls = xgb.XGBClassifier(learning_rate=0.1, max_depth=4, n_estimators=200).fit(X_train, y_train)

explain_weights(cls)

Weight,Feature
0.5162,worst radius
0.1203,worst area
0.0708,worst concave points
0.0678,mean concave points
0.0443,worst perimeter
0.0234,concavity error
0.0227,worst concavity
0.0194,worst texture
0.0139,mean texture
0.0125,worst compactness


In [96]:
explain_prediction(cls,X_test.iloc[1])

Contribution?,Feature
1.124,worst area
0.995,area error
0.965,<BIAS>
0.772,worst concave points
0.735,worst concavity
0.693,worst radius
0.448,mean concave points
0.379,mean texture
0.328,worst texture
0.326,worst smoothness


In [97]:
show_prediction(cls, X_test.iloc[34])

Contribution?,Feature
2.338,worst area
1.287,worst concave points
1.139,worst radius
1.02,worst perimeter
0.61,mean concave points
0.481,worst concavity
0.428,mean texture
0.39,area error
0.302,worst texture
0.185,worst symmetry
