# Explainer Dashboard website development

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from explainerdashboard import ClassifierExplainer, ExplainerDashboard


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


## Load UNSW Models and test train split

In [3]:
from joblib import load
rf_classifier_UNSW_Default = load('datasets/explainerDashboardFiles/rf_classifierdefaultUNSW.joblib')


In [4]:
# Load the datasets
X_trainUNSW = pd.read_parquet('datasets/explainerDashboardFiles/X_trainUNSW.parquet')
X_testUNSW = pd.read_parquet('datasets/explainerDashboardFiles/X_testUNSW.parquet')
y_train_loaded = pd.read_parquet('datasets/explainerDashboardFiles/y_trainUNSW.parquet')
y_test_loaded = pd.read_parquet('datasets/explainerDashboardFiles/y_testUNSW.parquet')

In [5]:
y_trainUNSW = y_train_loaded['target']
y_testUNSW = y_test_loaded['target']

In [6]:
# Assuming y_testUNSW is loaded and might be a DataFrame instead of a Series
# Ensure y_testUNSW is a Series with string labels
if isinstance(y_testUNSW, pd.DataFrame):
    y_testUNSW = y_testUNSW['target']  # Adjust 'target' to your actual column name

In [7]:
from sklearn.preprocessing import LabelEncoder
# Initialize the encoder
label_encoder = LabelEncoder()

# Fit the encoder to the test labels
# This assumes y_testUNSW is a pandas Series. If it's not, adjust accordingly.
label_encoder.fit(y_testUNSW)

# Encode the test labels
y_testUNSW_encoded = label_encoder.transform(y_testUNSW)

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming X_testUNSW and y_testUNSW are loaded as shown in your code

# Reduce the test set to 5% of its original size, maintaining the class ratio
X_testUNSW_reduced, _, y_testUNSW_reduced, _ = train_test_split(
    X_testUNSW, 
    y_testUNSW_encoded, 
    test_size=0.05,  # Keep 5% of the original size
    stratify=y_testUNSW,  # Stratify by y_testUNSW to keep the class ratio
    random_state=42  # For reproducibility
)

## Initiate explainer

In [9]:
class_labels = y_trainUNSW.unique().tolist()

explainer = ClassifierExplainer(
    model=rf_classifier_UNSW_Default,
    X=X_testUNSW_reduced,
    y=y_testUNSW_reduced,
    labels=class_labels 
# Pass the list of class labels here# Disable SHAP interaction calculations
# Ensure model output is set to probability for classification
)

Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)


In [None]:
dashboard = ExplainerDashboard(explainer, mode='external', shap_interaction=False, no_permutations=True, hide_poweredby=True,
                     shap_dependence=False,
                    decision_trees=False,
                              
)
dashboard.run(port=8050)  # You can specify a different port if necessary