In [None]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib
import lime
import lime.lime_tabular
import shap
shap.initjs()

# Read data

In [None]:
df = pd.read_csv('../Dataset/df_processed.csv')

In [None]:
df.head()

# Create label and features

In [None]:
X = df.drop('Grade', axis=1)
y = df['Grade']

# Load model

In [None]:
blackbox_model = joblib.load('../Models/blackbox_model.pkl')

# Shap values

In [None]:
# Get SHAP values
explainer = shap.TreeExplainer(blackbox_model)
shap_values = explainer(X)

In [None]:
# Visualize the first prediction's explanation
shap.plots.force(shap_values[0])

In [None]:
# Visualize the training set predictions
shap.plots.force(shap_values[0:100])

In [None]:
# Create a SHAP dependence plot to show the effect of feature Ethnicgroup across the whole dataset
shap.dependence_plot("EthnicGroup", shap_values[0:100].values, X[0:100], interaction_index="LunchType")

In [None]:
# Summarize the effects of all the features
shap.plots.beeswarm(shap_values[0:100])

# Encode categorical values

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Step 1: Fit and transform the data
encoder = OneHotEncoder(sparse_output=False, drop='first')
X_encoded = encoder.fit_transform(X)

In [None]:
def predict_fn(data):
    # Inverse transform the data
    data_decoded = encoder.inverse_transform(data)
    
    # Convert to DataFrame
    data_decoded_df = pd.DataFrame(data_decoded, columns=X.columns)
    
    # Use the original blackbox_model's predict_proba method
    return blackbox_model.predict_proba(data_decoded_df)

# Lime values

In [None]:
from lime import lime_tabular

In [None]:
# Prepare Lime explainer
explainer = lime_tabular.LimeTabularExplainer(X_encoded, 
                                              feature_names=encoder.get_feature_names_out(X.columns), 
                                              class_names=['0', '1'], 
                                              verbose=True, mode='classification')

In [None]:
def explain_instance(i, explainer, X_encoded, predict_fn):
    """
    Explain the prediction of an instance using LIME.

    Parameters:
    - i: Index of the instance you want to explain
    - explainer: LIME explainer object
    - X_encoded: One-hot encoded data
    - predict_fn: Prediction function that decodes the encoded data

    Returns:
    - Displays the LIME explanation in the notebook
    """
    exp = explainer.explain_instance(X_encoded[i], predict_fn)
    return exp.show_in_notebook(show_table=True)


In [None]:
explain_instance(1, explainer, X_encoded, predict_fn)