In [4]:
import shap
import pandas as pd
import numpy as np
import pickle
import random 
import yaml
import re
import dill

from shapnarrative_metrics.llm_tools import llm_wrappers
from shapnarrative_metrics.misc_tools.manipulations import full_inversion, shap_permutation
from shapnarrative_metrics.llm_tools.generation import GenerationModel
from shapnarrative_metrics.llm_tools.extraction import ExtractionModel

In [5]:
with open("config/keys.yaml") as f:
    dict=yaml.safe_load(f)
api_key = dict["API_keys"]["OpenAI"]
replicate_key = dict["API_keys"]["Replicate"]
anthropic_key=dict["API_keys"]["Anthropic"]
mistral_key=dict["API_keys"]["Mistral"]

In [20]:
experiment_dir="manipulated_experiments"
i=4
metrics_path=f"results/{experiment_dir}/experiment_{i}/metrics.pkl" 
with open(metrics_path, "rb") as f:
    metrics=dill.load(f)

metrics[5].bleurt


[0.5136396288871765,
 0.4524648189544678,
 0.498776376247406,
 0.49796366691589355,
 0.4731096029281616,
 0.5132098197937012,
 0.4720908999443054,
 0.5049065351486206,
 0.5165247321128845,
 0.47141677141189575,
 0.48701781034469604,
 0.531221330165863,
 0.522166907787323,
 0.5129984617233276,
 0.5141683220863342,
 0.5196486711502075,
 0.5324174761772156,
 0.46242207288742065,
 0.5223731994628906,
 0.5208261609077454]

In [19]:
experiment_dir="manipulated_experiments"
i=1
metrics_path=f"results/{experiment_dir}/experiment_{i}/metrics.pkl" 
with open(metrics_path, "rb") as f:
    metrics=dill.load(f)

metrics[5].narrative_list[1]


"The model predicted a 53% probability that the customer will be a good credit, and therefore predicted the outcome 1. This prediction was largely influenced by the customer's credit history, which had a negative contribution to the predicted probability. Despite having a credit history rating of 4, indicating that all credits were always paid back duly, this rating is below the average rating in the dataset, suggesting that the customer's credit history may not be as strong as others in the dataset. \n\nOn the other hand, the customer's savings account status had a positive contribution to the predicted probability, indicating that having a savings account with a moderate amount of savings is seen as a positive factor by the model. The customer's checking account status also had a positive contribution, suggesting that having a stable checking account is important for being a good credit. \n\nThe amount of credit requested, 6361 DM, also played a role in the prediction, although its c

In [216]:
dataset_name="student"

with open(f'data/{dataset_name}_dataset/dataset_info', 'rb') as f:
   ds_info= pickle.load(f)

with open(f'data/{dataset_name}_dataset/RF.pkl', 'rb') as f:
   trained_model=pickle.load(f)

train=pd.read_parquet(f"data/{dataset_name}_dataset/train_cleaned.parquet")
test=pd.read_parquet(f"data/{dataset_name}_dataset/test_cleaned.parquet")

In [217]:
test

Unnamed: 0,sex,age,famsize,Pstatus,Medu,Fedu,traveltime,studytime,failures,schoolsup,...,reason_course,reason_home,reason_other,reason_reputation,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,target
252,1,18,0,1,2,1,1,1,1,0,...,0,0,1,0,0,0,0,1,0,0
236,1,17,1,1,2,2,1,2,0,0,...,0,1,0,0,0,0,1,0,0,1
275,0,17,1,1,2,2,2,2,0,1,...,1,0,0,0,0,0,0,1,0,1
148,1,16,0,1,4,4,1,1,0,0,...,1,0,0,0,0,0,0,0,1,0
309,0,19,1,1,1,1,1,2,1,1,...,0,0,0,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,0,16,0,1,4,3,1,2,0,0,...,0,1,0,0,0,1,0,0,0,1
199,0,16,1,1,4,4,1,2,0,0,...,0,0,0,1,0,0,0,0,1,1
320,0,17,0,0,4,3,1,2,0,0,...,1,0,0,0,0,0,0,1,0,1
375,0,18,0,1,1,1,4,3,0,0,...,0,1,0,0,0,0,1,0,0,1


In [218]:
n=14

idx=13
x=test[test.columns[0:-1]].loc[[idx]]
y=test[test.columns[-1]].loc[[idx]]

 

In [219]:
TEMPERATURE=0
MANIP=True


gpt = llm_wrappers.GptApi(api_key, model="gpt-4o", system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
llama_generation = llm_wrappers.LlamaAPI(api_key=replicate_key , model="llama-3-70b-instruct",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
claude_generation = llm_wrappers.ClaudeApi(api_key=anthropic_key , model="claude-3-5-sonnet-20240620",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
mistral_generation=llm_wrappers.MistralApi(api_key=mistral_key, model="mistral-large-2407" ,system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)

generator=GenerationModel(ds_info=ds_info, llm=llama_generation)


In [220]:
generator.gen_variables(trained_model,x,y,tree=True)
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,absences,0.046066,2,5.75,Number of school absences (actual number of ab...
1,failures,0.045838,0,0.360759,Number of past class failures (from 0 to 3)
2,sex,0.029482,1,0.458861,The sex of the student as a binary variable (0...
3,goout,0.025815,3,3.098101,Going out with friends (from 1 - very low to 5...


In [221]:
prompt=generator.generate_story_prompt(iloc_pos=0,manipulate=MANIP, manipulation_func=shap_permutation)
print(prompt)



        Your goal is to generate a textual explanation or narrative as to why an AI model made a certain prediction for one particular instance. 
        To do this, you will be provided with a dictionary that contains broad descriptions of the specific dataset, target variable, and the task the model was trained on.
        Additionally, you will be provided with a dataframe that contains the names of all the features, their descriptions, their values, their average values and their SHAP values.
        Finally you will get a single string describing the result of the prediction.

        The goal of SHAP is to explain the prediction of an instance by computing the contribution of each feature to the prediction.
        Each individual SHAP value is a measure of how much additional probability this feature adds or subtracts 
        in the predicted probability relative to the base level probability. 
        This relative nature of the SHAP values might have unexpected consequences

In [10]:
narratives =generator.generate_stories(trained_model, x , y , tree=True, manipulate=MANIP)
narrative_split=re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', narratives[0])
for sentence in narrative_split:
    print(sentence)

Generated story 1/1 with llama-3-70b-instruct
The model predicted a 76% probability that the student would pass, and therefore predicted the outcome 1.
This prediction was largely driven by the student's mother being a teacher, which positively contributed to the prediction.
Having a mother in a teaching profession likely provided the student with a supportive and academically-oriented environment, increasing their chances of passing.
On the other hand, the student's two absences from school had a negative impact on the prediction, suggesting that even a relatively low number of absences can hinder academic performance.
The student's lack of past class failures also contributed positively to the prediction, indicating that they had a strong academic foundation.
Additionally, the student's moderate frequency of going out with friends, which is below the average, may have helped them maintain a balance between social life and academic responsibilities, further supporting the prediction.


In [11]:
extractor=ExtractionModel(ds_info=ds_info, llm=gpt)
prompt=extractor.generate_prompt(narratives[0])
print(prompt)


        An LLM was used to create a narrative to explain and interpret a prediction 
        made by another smaller classifier model. The LLM was given an explanation of 
        the classifier task, the training data, and provided with the exact names of all 
        the features and their meaning. Most importantly, the LLM was provided with a table 
        that contains the feature values of that particular instance, the average feature values and their SHAP values 
        which are a numeric measure of their importance. Here is some general info about the task:

        Dataset description: The dataset contains information about students from two Portugese high schools and in particular their family situation and other habits,
        Target description: The target variable represents the final year grade, transformed into whether the student passed (1) or not (0) at the end of the year,
        Task description": Predict whether a student will pass,

        The LLM returned th

In [12]:
extraction=extractor.generate_extractions(narratives)
extraction[0]

Extracted story 1/1 with gpt-4o


{'Mjob_teacher': {'rank': 0,
  'sign': 1,
  'value': None,
  'assumption': 'Having a mother in a teaching profession likely provides a supportive and academically-oriented environment.'},
 'absences': {'rank': 1,
  'sign': -1,
  'value': 2,
  'assumption': 'Even a relatively low number of absences can hinder academic performance.'},
 'failures': {'rank': 2,
  'sign': 1,
  'value': None,
  'assumption': 'A lack of past class failures indicates a strong academic foundation.'},
 'goout': {'rank': 3,
  'sign': 1,
  'value': None,
  'assumption': 'A moderate frequency of going out with friends may help maintain a balance between social life and academic responsibilities.'}}

In [13]:
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,goout,0.063416,2,3.098101,Going out with friends (from 1 - very low to 5...
1,failures,0.038342,0,0.360759,Number of past class failures (from 0 to 3)
2,absences,0.036718,2,5.75,Number of school absences (actual number of ab...
3,Mjob_teacher,-0.030872,1,0.136076,One-hot variable for mothers's job -- teacher


In [14]:
rank_diff, sign_diff , value_diff, real_rank, extracted_rank=extractor.get_diff(extraction[0],generator.explanation_list[0])

In [15]:
rank_diff

[-3.0, -1.0, 1.0, 3.0]

In [16]:
sign_diff

[1.0, 1.0, 0.0, 0.0]