In [49]:
import shap
import pandas as pd
import numpy as np
import pickle
import random 
import yaml
import re
import dill

from shapnarrative_metrics.llm_tools import llm_wrappers
from shapnarrative_metrics.misc_tools.manipulations import full_inversion, shap_permutation
from shapnarrative_metrics.llm_tools.generation import GenerationModel
from shapnarrative_metrics.llm_tools.extraction import ExtractionModel

### Load necessary keys and data

In [50]:
with open("config/keys.yaml") as f:
    dict=yaml.safe_load(f)
api_key = dict["API_keys"]["OpenAI"]
replicate_key = dict["API_keys"]["Replicate"]
anthropic_key=dict["API_keys"]["Anthropic"]
mistral_key=dict["API_keys"]["Mistral"]

In [51]:
dataset_name="credit"

with open(f'data/{dataset_name}_dataset/dataset_info', 'rb') as f:
   ds_info= pickle.load(f)

with open(f'data/{dataset_name}_dataset/RF.pkl', 'rb') as f:
   trained_model=pickle.load(f)

train=pd.read_parquet(f"data/{dataset_name}_dataset/train_cleaned.parquet")
test=pd.read_parquet(f"data/{dataset_name}_dataset/test_cleaned.parquet")

In [52]:
test

Unnamed: 0,status,duration,credit_history,amount,savings,employment_duration,installment_rate,other_debtors,present_residence,age,...,purpose_furnitures,purpose_others,purpose_radio_tv,purpose_repairs,purpose_retraining,purpose_vacation,housing_free,housing_own,housing_rent,credit_risk
681,2,12,2,1155,1,5,3,3,3,40,...,1,0,0,0,0,0,0,0,1,1
990,1,18,2,750,1,1,4,1,1,27,...,0,0,0,1,0,0,0,0,1,0
155,4,24,4,2684,1,3,4,1,2,35,...,1,0,0,0,0,0,0,0,1,1
768,2,9,1,1437,2,4,2,1,3,29,...,0,1,0,0,0,0,0,0,1,0
438,1,27,4,2442,1,5,4,1,4,43,...,0,0,0,0,1,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376,1,12,0,1108,1,4,4,1,3,28,...,0,0,0,0,0,0,0,0,1,0
731,4,60,2,6527,5,3,4,1,4,34,...,0,1,0,0,0,0,0,1,0,1
594,1,30,0,4583,1,3,2,3,2,32,...,0,0,0,0,0,0,0,0,1,1
65,4,6,4,1237,2,3,1,1,1,27,...,1,0,0,0,0,0,0,0,1,1


In [53]:
n=14

idx=882
x=test[test.columns[0:-1]].loc[[idx]]
y=test[test.columns[-1]].loc[[idx]]

 

In [None]:
TEMPERATURE=0
MANIP=True


gpt = llm_wrappers.GptApi(api_key, model="gpt-4o", system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
llama_generation = llm_wrappers.LlamaAPI(api_key=replicate_key , model="llama-3-70b-instruct",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
claude_generation = llm_wrappers.ClaudeApi(api_key=anthropic_key , model="claude-3-5-sonnet-20240620",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
mistral_generation=llm_wrappers.MistralApi(api_key=mistral_key, model="mistral-large-2407" ,system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
generator=GenerationModel(ds_info=ds_info, llm=gpt)


In [55]:
generator.gen_variables(trained_model,x,y,tree=True)
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,status,-0.104813,1,2.5725,Status of the checking account: (1: no checkin...
1,duration,-0.097553,48,20.79375,Duration of the credit request in months
2,amount,-0.050633,7763,3258.61625,Amount of credit requested in DM
3,savings,-0.038201,1,2.1025,Status of the savings account (1: no savings a...


In [56]:
prompt=generator.generate_story_prompt(iloc_pos=0,manipulate=MANIP, manipulation_func=full_inversion)
print(prompt)



        Your goal is to generate a textual explanation or narrative as to why an AI model made a certain prediction for one particular instance. 
        To do this, you will be provided with a dictionary that contains broad descriptions of the specific dataset, target variable, and the task the model was trained on.
        Additionally, you will be provided with a dataframe that contains the names of all the features, their descriptions, their values, their average values and their SHAP values.
        Finally you will get a single string describing the result of the prediction.

        The goal of SHAP is to explain the prediction of an instance by computing the contribution of each feature to the prediction.
        Each individual SHAP value is a measure of how much additional probability this feature adds or subtracts 
        in the predicted probability relative to the base level probability. 
        This relative nature of the SHAP values might have unexpected consequences

In [57]:
narratives =generator.generate_stories(trained_model, x , y , tree=True, manipulate=MANIP)
narrative_split=re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', narratives[0])
for sentence in narrative_split:
    print(sentence)

Generated story 1/1 with gpt-4o
The model predicted a 30% probability that the customer would be a good credit risk, ultimately classifying them as a bad credit risk.
The most influential factor in this prediction was the status of the customer's savings account, which indicated no savings account.
This lack of savings likely signaled financial instability, contributing negatively to the probability of being a good credit risk.
The amount of credit requested was also significant, with the customer asking for 7,763 DM, a figure considerably higher than the average.
This large loan request might have raised concerns about the customer's ability to repay, further decreasing the likelihood of a positive credit outcome.

Additionally, the duration of the credit request, set at 48 months, was longer than typical, which could imply a higher risk of default over time.
This extended duration likely added to the model's cautious stance.
Lastly, the status of the checking account, which showed no

In [11]:
extractor=ExtractionModel(ds_info=ds_info, llm=gpt)
prompt=extractor.generate_prompt(narratives[0])
print(prompt)


        An LLM was used to create a narrative to explain and interpret a prediction 
        made by another smaller classifier model. The LLM was given an explanation of 
        the classifier task, the training data, and provided with the exact names of all 
        the features and their meaning. Most importantly, the LLM was provided with a table 
        that contains the feature values of that particular instance, the average feature values and their SHAP values 
        which are a numeric measure of their importance. Here is some general info about the task:

        Dataset description: The dataset contains information about students from two Portugese high schools and in particular their family situation and other habits,
        Target description: The target variable represents the final year grade, transformed into whether the student passed (1) or not (0) at the end of the year,
        Task description": Predict whether a student will pass,

        The LLM returned th

In [12]:
extraction=extractor.generate_extractions(narratives)
extraction[0]

Extracted story 1/1 with gpt-4o


{'Mjob_teacher': {'rank': 0,
  'sign': 1,
  'value': None,
  'assumption': 'Having a mother in a teaching profession likely provides a supportive and academically-oriented environment.'},
 'absences': {'rank': 1,
  'sign': -1,
  'value': 2,
  'assumption': 'Even a relatively low number of absences can hinder academic performance.'},
 'failures': {'rank': 2,
  'sign': 1,
  'value': None,
  'assumption': 'A lack of past class failures indicates a strong academic foundation.'},
 'goout': {'rank': 3,
  'sign': 1,
  'value': None,
  'assumption': 'A moderate frequency of going out with friends may help maintain a balance between social life and academic responsibilities.'}}

In [13]:
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,goout,0.063416,2,3.098101,Going out with friends (from 1 - very low to 5...
1,failures,0.038342,0,0.360759,Number of past class failures (from 0 to 3)
2,absences,0.036718,2,5.75,Number of school absences (actual number of ab...
3,Mjob_teacher,-0.030872,1,0.136076,One-hot variable for mothers's job -- teacher


In [14]:
rank_diff, sign_diff , value_diff, real_rank, extracted_rank=extractor.get_diff(extraction[0],generator.explanation_list[0])

In [15]:
rank_diff

[-3.0, -1.0, 1.0, 3.0]

In [16]:
sign_diff

[1.0, 1.0, 0.0, 0.0]