In [None]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib
import lime
import lime.lime_tabular
import shap
shap.initjs()

# Read data

In [None]:
df = pd.read_csv('../Dataset/df_processed.csv')

In [None]:
df.head()

# Create label and features

In [None]:
X = df.drop('Grade', axis=1)
y = df['Grade']

# Load model

In [None]:
blackbox_model = joblib.load('../Models/blackbox_model.pkl')

# Shap values

In [None]:
# Get SHAP values
explainer = shap.TreeExplainer(blackbox_model)
shap_values = explainer(X)

In [None]:
# Visualize the first prediction's explanation
shap.plots.force(shap_values[0])

In [None]:
# Visualize the training set predictions
shap.plots.force(shap_values[0:100])

In [None]:
# Create a SHAP dependence plot to show the effect of feature Ethnicgroup across the whole dataset
shap.dependence_plot("EthnicGroup", shap_values[0:100].values, X[0:100], interaction_index="LunchType")

In [None]:
# Summarize the effects of all the features
shap.plots.beeswarm(shap_values[0:100])

# Encode categorical values

In [None]:
# Encode categorical variables
X_encoded = X.copy()

encodings = {}
cat_positions = []

for idx, column in enumerate(X.columns):
    if X[column].dtype == 'object':  # Check if the column is categorical
        cat_positions.append(idx)
            
        encoding = {value: idx for idx, value in enumerate(X[column].unique())}
        encodings[column] = encoding
        X_encoded[column] = X[column].map(encoding)


In [None]:
def encoded_predict_proba(encoded_row_values):
    encoded_list = list(encoded_row_values)
    decoded_values = encoded_list.copy()
    for idx, position in enumerate(cat_positions):
        column = X.columns[position]
        reverse_encoding = {idx: value for value, idx in encodings[column].items()}
        decoded_values[position] = reverse_encoding[encoded_list[position]]
    return blackbox_model.predict_proba(decoded_values)

# Lime values

In [None]:
# Initialize LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_encoded.astype(int).values,
    training_labels=y,
    feature_names=X.columns.tolist(),
    categorical_features=cat_positions,
    verbose=True, 
    mode='classification'
)

In [None]:
# Explain a single prediction
exp = explainer.explain_instance(X_encoded.iloc[0].values, encoded_predict_proba, num_features=len(X_encoded.columns))

In [None]:
# Show the explanation
exp.show_in_notebook()