In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from explainerdashboard import RegressionExplainer
from explainerdashboard import ClassifierExplainer
from explainerdashboard import ExplainerDashboard

#Import the Diabetes Dataset
from sklearn.datasets import load_diabetes
data= load_diabetes()
X=pd.DataFrame(data.data,columns=data.feature_names)
y=pd.DataFrame(data.target,columns=["target"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
model = RandomForestRegressor(n_estimators=50, max_depth=5)
model.fit(X_train, y_train.values.ravel())

In [8]:
def submit_dashboard(name, port, explainer):
    '''Packages dashboard information and exports it into two files (yaml and joblib pair).
    The sagemaker lifecycle script starts a listener process that spins up explainer dashboards
    on the sagemaker system terminal.
    
    name: title for dashboard, also used for naming exported files
    port: the port on which sagemaker studio should spin up the dashboard
          (nothing will happen if the port is already in use)
    model: the model object (random forrest or whatever)'''
    
    DIRECTORY_TO_WATCH = "/home/sagemaker-user/dashboard-definitions"
    db = ExplainerDashboard(
        explainer,
        title=name,
        whatif=False,
        mode='dash',
        port=port,
        routes_pathname_prefix='/',
        requests_pathname_prefix=f'/jupyter/default/proxy/{port}/'
    )

    # export dashboard files
    filename_prefix = name.replace(' ', '_').replace('.', '_')
    db.to_yaml(
        filepath=f"{DIRECTORY_TO_WATCH}/{filename_prefix}.yaml",
        explainerfile=f"{DIRECTORY_TO_WATCH}/{filename_prefix}.joblib",
        dump_explainer=True)
    print(f'<STUDIO_URL>/jupyter/default/proxy/{port}')

    

# explainer = ClassifierExplainer(model, X_test, y_test)
explainer = RegressionExplainer(model, X_test, y_test)
submit_dashboard('testing tests', 8056, explainer)

Changing class type to RandomForestRegressionExplainer...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating predictions...
Calculating residuals...
Calculating absolute residuals...
Calculating shap interaction values...
Reminder: TreeShap computational complexity is O(TLD^2), where T is the number of trees, L is the maximum number of leaves in any tree and D the maximal depth of any tree. So reducing these will speed up the calculation.
Calculating dependencies...
Calculating importances...
Calculating ShadowDecTree for each individual decision tree...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explaine