## 6. Explainability and LLMs

In this section why use the LLM model to better understand informations about our classifier

#### Importation of the librairies needed

In [1]:
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("dependencies")

from omnixai.data.tabular import Tabular
from omnixai.preprocessing.tabular import TabularTransform
from omnixai.explainers.tabular import TabularExplainer
import xgboost as xgboost

import pandas as pd
import numpy as np
import sklearn

import math
import random

from openai import OpenAI

### Load and preprocess the dataset

In [None]:
# Load Dataset
feature_names = [
    "Age", "Workclass", "Education","Marital Status", "Occupation", "Relationship", 
    "Race", "Sex", "Capital Gain","Capital Loss", "Hours per week", "Country", 
    "label"
]

# Load the dataset into a DataFrame
df = pd.DataFrame(
  np.genfromtxt('adult.data', delimiter=', ', dtype=str, usecols=(0,1,3,5,6,7,8,9,10,11,12,13,14)),    
  columns=feature_names
)


#Drop missing values denoted as ?
cols = list(df.columns)
df[cols] = df[cols].replace(['?'], np.nan)
df = df.dropna()

# Binarize age
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
median_age = df['Age'].median()

# Create binary columns for 'Age' and 'Sex'
df['age_binary'] = df['Age'].apply(lambda x: 0 if x <= median_age else 1)
df['sex_binary'] = df['Sex'].apply(lambda x: 0 if x == 'Female' else 1)

# Drop the original 'Age' and 'Sex' columns
df = df.drop('Age', axis=1)
df = df.drop('Sex', axis=1)

# Convert the 'label' column to binary
df["label"] = df["label"].replace('>50K',1)
df["label"] = df["label"].replace('<=50K',0)


### Apply LDP on Data set 

In [3]:
# Define functions for local differential privacy
def get_epsilon(p=0.75, q=0.75):
    return math.log( max(q/(1-p), p/(1-q)) )

def rand_resp(x, p=0.75, q=0.75):
    toss = random.random()
    if x == 0:
        y = 0 if toss <= q else 1
    else:
        y = 1 if toss <= p else 0
    return y

In [None]:
# Create a copy of the original DataFrame
df_private = df.copy()
epsilon = get_epsilon()

# Apply local differential privacy to 'age_binary' and 'sex_binary' columns
df_private['age_binary_private'] = df_private['age_binary'].apply(lambda x: rand_resp(x))
df_private['sex_binary_private'] = df_private['sex_binary'].apply(lambda x: rand_resp(x))

# Define feature names and categorical columns
feature_names = [
    "age_binary_private", "Workclass", "Education","Marital Status", "Occupation", "Relationship", 
    "Race", "sex_binary_private", "Capital Gain","Capital Loss", "Hours per week", "Country", 
    "label"
]
categorical_columns=[
   "Workclass", "Education", "Marital Status", "Occupation", "Relationship", "Race", "sex_binary_private", 'age_binary_private', "Country", "label"
]

# Convert columns to appropriate data types
for feature in feature_names:
    if feature in categorical_columns:
        df_private[feature] = df_private[feature].astype("category")
    else:
        df_private[feature] = df_private[feature].astype("int64")

### Split and Train Data

In [5]:
# Data preprocessing to train model
columns = [
    "age_binary_private", "Workclass", "Education","Marital Status", "Occupation", "Relationship", 
    "Race", "sex_binary_private", "Capital Gain","Capital Loss", "Hours per week", "Country"]

# Create a Tabular object for the private data
tabular_data = Tabular(
   df_private,
   categorical_columns=columns,
   target_column='label'
)

# Divide into training and test set
transformer = TabularTransform().fit(tabular_data)
class_names = transformer.class_names
x = transformer.transform(tabular_data)
train, test, train_labels, test_labels = \
    sklearn.model_selection.train_test_split(x[:, :-1], x[:, -1], train_size=0.70,random_state = 1234)

# Convert test labels to integers
test_labels = test_labels.astype(int)

# Print the shapes of the training and test data
print('Training data shape: {}'.format(train.shape))
print('Test data shape:     {}'.format(test.shape))

# Train an XGBoost model
model = xgboost.XGBClassifier(n_estimators=300, max_depth=5)
model.fit(train, train_labels)

# Define a prediction function
predict_function=lambda z: model.predict_proba(transformer.transform(z))


# Convert the transformed data back to Tabular instances
train_data = transformer.invert(train)
test_data = transformer.invert(test)

# Display the target column and the first two training labels
#display(tabular_data.target_column)
#display(train_labels[:2])

Training data shape: (21113, 404)
Test data shape:     (9049, 404)


In [None]:
test_df = test_data.to_pd()
test_df["income-per-year"] = test_labels
predictions = model.predict(test)
test_df["prediction"] = predictions

# Display a sample of the test DataFrame
# test_df.sample(5)

### Identify instances where the model is wrong but highly confident 

In [7]:
# Find instances where the predicted label is different from the actual label
miss_indices = np.where(predictions != test_labels)[0]

# Create the prediction probabilities
proba = model.predict_proba(test)

# Find instances where the model is very confident but wrong
miss_but_confident = []

for idx in miss_indices:
    if max(proba[idx]) > 0.95:
        miss_but_confident.append(idx)
    
# Print the number of instances where the model is very confident but wrong
print(f"There are {len(miss_but_confident)} instances where the model is very confident but wrong.")

instance_ids = miss_but_confident

# Print an example of a highly confident but wrong prediction
print(f"For example, instance {instance_ids[0]} has label {test_labels[instance_ids[0]]} and prediction {predictions[instance_ids[0]]}, with probs {model.predict_proba(test[instance_ids[0]:instance_ids[0]+1])[0]}")

There are 58 instances where the model is very confident but wrong.
For example, instance 122 has label 1 and prediction 0, with probs [0.96228683 0.03771318]


### Explanation of instances where the model is wrong but highly confident.

In [8]:
# Initialize the explainers for the identified instances
explainers = TabularExplainer(
  explainers=['lime'],                       # The explainers to apply
  mode="classification",                             # The task type
  data=train_data,                                   # The data for initializing the explainers
  model=model,                                       # The ML model to explain
  preprocess=lambda z: transformer.transform(z),     # Converts raw features into the model inputs
)
# Select the instances to explain
instance_ids = instance_ids[:1]  # Select the first 10 instances to better visualization 
test_instances = test_data[instance_ids]
local_explanations = explainers.explain(X=test_instances)

### LIME explanations for the identified instances

In [9]:
print("LIME explanation")

for i, instance_id in enumerate(instance_ids):
    print(f"Instance {instance_id} has {test_labels[instance_id]} and prediction {predictions[instance_id]}, with probs {model.predict_proba(test[instance_id:instance_id+1])[0]}")
    local_explanations["lime"].ipython_plot(index=i, class_names=class_names)

LIME explanation
Instance 122 has 1 and prediction 0, with probs [0.96228683 0.03771318]


### Translate explanation to Natural language

In [10]:
# Extract explanations in a readable text format
print("LIME Explanation (as text):")

for i, instance_id in enumerate(instance_ids):
    # Access the explanation for the current instance
    explanation = local_explanations["lime"][i]  # Access the i-th explanation

    # Extract features and their importance scores
    features = explanation.explanations[0]["features"]  # List of feature names
    scores = explanation.explanations[0]["scores"]  # List of importance scores

    # Build the textual explanation
    formatted_explanation = "For this individual, the key factors are:\n"
    for feature, score in zip(features, scores):
        formatted_explanation += f"- {feature}: contribution of {float(score):.2f}\n"

    print(f"Instance {instance_id}:")
    print(formatted_explanation)
    print("\n" + "-"*50 + "\n")




LIME Explanation (as text):
Instance 122:
For this individual, the key factors are:
- Capital Gain: contribution of 0.39
- Marital Status: contribution of -0.23
- Capital Loss: contribution of 0.20
- Education: contribution of 0.16
- age_binary: contribution of -0.06
- sex_binary: contribution of -0.04
- Country: contribution of 0.03
- Occupation: contribution of -0.02
- Race: contribution of -0.02
- Relationship: contribution of -0.01


--------------------------------------------------



In [11]:
# Connection to local OpenAI server
client = OpenAI(
    api_key="your-api-key",
    base_url="http://localhost:1234/v1"
)

Each time we interact with it, the LLM starts anew. So we need to store and replay the messages.
And we'll build a function to interact with the LLM. 

In [12]:
messages = [
    {"role": "system", "content": "Hello! How can I help you today?"}
]

def get_response(message, messages):
    messages.append({"role": "user", "content": message})
    completion = client.chat.completions.create(
        messages=messages,
        temperature=0.8,
        model="model"
    )
    response = completion.choices[0].message.content
    messages.append({"role": "assistant", "content": response})
    return response, messages

Send formatted_explanation to LLM

In [14]:
message = f"Explain the following explaination as text that a person can easily understand :\n{formatted_explanation} "
response, messages = get_response(message, messages)
print(response)

**Breaking down the numbers**

Let's look at the factors that affect this individual, along with how much each factor affects their situation.

* **Capital Gain**: For every dollar earned in capital gains (like from investments), they get 39 cents more.
* **Marital Status**: Getting married actually hurts them - it gives them a 23-cent penalty.
* **Capital Loss**: Losing money is not a big deal for this person - it's only worth about 20 cents less to them.
* **Education**: Having a higher education level helps them out - it gives them a 16-cent boost.
* **Age**: For every year they get older, their situation gets slightly worse by 6 cents (this is a "binary" factor, meaning there are two possible ages).
* **Sex**: Being female doesn't have a huge impact on their situation - it only hurts them by 4 cents.
* **Country**: The country they live in has no significant effect on their situation, but they get a tiny boost of 3 cents from living in this particular country.
* **Occupation**: The