# Understadning xgboost model prediction using python's explainerdashboard package

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import datetime as dt

  import pandas.util.testing as tm


### Data Preprocessing

In [2]:
df1=pd.read_csv('llama_forecast_train.csv')
df2=pd.read_csv('historical_weather.csv')
df2.rename(columns={'HOUR':'DAY'},inplace='True')
df1=pd.merge(df1,df2,on=['DAY'], how= "left")

In [3]:
# Getting date features from the training data
df1['Date']=pd.to_datetime(df1['DAY'])
df1.sort_values(by='Date',ascending=True,inplace=True)
df1['Year']=df1['Date'].dt.year
df1['Month']=df1['Date'].dt.month
df1['Week']=df1['Date'].dt.week
df1['Day']=df1['Date'].dt.day
df1['Week Day']=df1['Date'].dt.dayofweek
df1['Year Day']=df1['Date'].dt.dayofyear
df1.drop(['Date','DAY'],axis=1,inplace=True)

  


In [4]:
df3=pd.concat([df1, pd.get_dummies(df1[['HABITAT NAME']])], axis=1).drop(['HABITAT NAME'],axis=1)
df3.head()

Unnamed: 0,HABITAT ID,AVAILABLE LLAMAS,TEMPERATURE,HUMIDITY,PRECIPITATION,WINDSPEED,Year,Month,Week,Day,...,HABITAT NAME_Peachtree,HABITAT NAME_Perfect Square,HABITAT NAME_Primary Cay,HABITAT NAME_Pulp Point,HABITAT NAME_Random Forest,HABITAT NAME_Ridge Road,HABITAT NAME_Shortest Path,HABITAT NAME_Tierra del Fuego,HABITAT NAME_Vector Field,HABITAT NAME_Wildcat Way
0,0,74,73,71,0.0,9.4,2017,7,26,1,...,1,0,0,0,0,0,0,0,0,0
10908,18,58,73,71,0.0,9.4,2017,7,26,1,...,0,0,0,0,0,0,1,0,0,0
12120,20,96,73,71,0.0,9.4,2017,7,26,1,...,0,0,0,0,0,0,0,0,1,0
13332,22,88,73,71,0.0,9.4,2017,7,26,1,...,0,0,0,0,0,0,0,0,0,0
4242,7,48,73,71,0.0,9.4,2017,7,26,1,...,0,1,0,0,0,0,0,0,0,0


### Model Building

In [5]:
y=df3['AVAILABLE LLAMAS']
x=df3.drop(['AVAILABLE LLAMAS'],axis=1)

In [6]:

import xgboost as xgb
model=xgb.XGBRegressor(learning_rate=0.15,max_depth=6,min_child_weight=2,gamma=0.5,subsample=0.7,scale_pos_weight=0.7)

In [7]:

b=int(len(df1)*.75)
x_train, x_test=x.iloc[0:b, :], x.iloc[b:-1, :]
y_train, y_test=y.iloc[0:b], y.iloc[b:-1]
model.fit(x_train,y_train)


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0.5, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.15, max_delta_step=0, max_depth=6,
             min_child_weight=2, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=0.7, subsample=0.7,
             tree_method='exact', validate_parameters=1, verbosity=None)

### Model Understanding

In [8]:
from explainerdashboard import RegressionExplainer, ExplainerDashboard
from explainerdashboard.datasets import feature_descriptions
explainer = RegressionExplainer(model, x_test, y_test,
                                cats=['HABITAT NAME'], 
                                descriptions=feature_descriptions)
                                #units="$"

ExplainerDashboard(explainer).run()

Changing class type to XGBRegressionExplainer...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating predictions...
Calculating residuals...
Calculating absolute residuals...
Generating xgboost model dump...
Calculating dependencies...
Calculating importances...
Calculating shap interaction values...
Reminder: TreeShap computational complexity is O(TLD^2), where T is the number of trees, L is the maximum number of leaves in any tree and D the maximal depth of any tree. So reducing these will speed up the calculation.
Calculating ShadowDecTree for each individual decision tree...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExpla

 * Running on http://0.0.0.0:8050/ (Press CTRL+C to quit)
192.168.100.24 - - [10/Apr/2021 19:53:04] "GET / HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "GET /_dash-dependencies HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "GET /_dash-layout HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:05] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.2

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\PSEN\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[r

192.168.100.24 - - [10/Apr/2021 19:53:07] "POST /_dash-update-component HTTP/1.1" 500 -
192.168.100.24 - - [10/Apr/2021 19:53:08] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:08] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:09] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:10] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:53:14] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 

192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 204 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 204 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 204 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 204 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 19:56:47] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.100.24 - - [10/Apr/2021 