In [40]:
import shap
import pandas as pd
import numpy as np
import pickle
import random 
import yaml
import re
import dill

from shapnarrative_metrics.llm_tools import llm_wrappers
from shapnarrative_metrics.misc_tools.manipulations import full_inversion, shap_permutation
from shapnarrative_metrics.llm_tools.generation import GenerationModel
from shapnarrative_metrics.llm_tools.extraction import ExtractionModel

### Load necessary keys and data

In [41]:
with open("config/keys.yaml") as f:
    dict=yaml.safe_load(f)
api_key = dict["API_keys"]["OpenAI"]
replicate_key = dict["API_keys"]["Replicate"]
anthropic_key=dict["API_keys"]["Anthropic"]
mistral_key=dict["API_keys"]["Mistral"]

In [42]:
dataset_name="fifa"

with open(f'data/{dataset_name}_dataset/dataset_info', 'rb') as f:
   ds_info= pickle.load(f)

with open(f'data/{dataset_name}_dataset/RF.pkl', 'rb') as f:
   trained_model=pickle.load(f)

train=pd.read_parquet(f"data/{dataset_name}_dataset/train_cleaned.parquet")
test=pd.read_parquet(f"data/{dataset_name}_dataset/test_cleaned.parquet")

In [43]:
n=14

idx=882

idx=4
x=test[test.columns[0:-1]].loc[[idx]]
y=test[test.columns[-1]].loc[[idx]]

 

In [44]:
TEMPERATURE=0
MANIP=True

gpt = llm_wrappers.GptApi(api_key, model="gpt-4o", system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
llama_generation = llm_wrappers.LlamaAPI(api_key=replicate_key , model="llama-3-70b-instruct",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
claude_generation = llm_wrappers.ClaudeApi(api_key=anthropic_key , model="claude-3-5-sonnet-20240620",system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
mistral_generation=llm_wrappers.MistralApi(api_key=mistral_key, model="mistral-large-2407" ,system_role="You are a teacher that explains AI predictions.", temperature=TEMPERATURE)
generator=GenerationModel(ds_info=ds_info, llm=gpt)


In [45]:
generator.gen_variables(trained_model,x,y,tree=True)
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,Goal Scored,-0.247045,0,1.303922,Number of goals scored by the team during the ...
1,Ball Possession %,0.0344,64,50.107843,Percentage of ball possession by the team duri...
2,Attempts,0.032912,13,12.333333,Number of attempts or shots taken by the team.
3,Fouls Committed,-0.022867,22,13.54902,Number of fouls committed by the team.


In [46]:
prompt=generator.generate_story_prompt(iloc_pos=0,manipulate=MANIP, manipulation_func=full_inversion)
print(prompt)



        Your goal is to generate a textual explanation or narrative as to why an AI model made a certain prediction for one particular instance. 
        To do this, you will be provided with a dictionary that contains broad descriptions of the specific dataset, target variable, and the task the model was trained on.
        Additionally, you will be provided with a dataframe that contains the names of all the features, their descriptions, their values, their average values and their SHAP values.
        Finally you will get a single string describing the result of the prediction.

        The goal of SHAP is to explain the prediction of an instance by computing the contribution of each feature to the prediction.
        Each individual SHAP value is a measure of how much additional probability this feature adds or subtracts 
        in the predicted probability relative to the base level probability. 
        This relative nature of the SHAP values might have unexpected consequences

In [39]:
narratives =generator.generate_stories(trained_model, x , y , tree=True, manipulate=MANIP)
narrative_split=re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', narratives[0])
for sentence in narrative_split:
    print(sentence)

Generated story 1/1 with gpt-4o
The model predicted a 26.00% probability that the team would receive the Man of the Match award, ultimately predicting that they would not receive it.
The most significant factor influencing this prediction was the number of fouls committed by the team, which was notably high at 22.
This likely had a negative impact on the team's overall performance and sportsmanship, reducing their chances of earning the award.
In contrast, the number of attempts made by the team was slightly above average, but this did not significantly boost their probability of winning the award, possibly because attempts without goals do not strongly influence the outcome.
The team's ball possession was relatively high at 64%, which typically suggests control over the game, yet it did not contribute positively to the prediction.
This could be due to the lack of goals scored, as the team failed to convert possession into tangible success on the scoreboard.
The absence of goals, a cri

In [31]:
extractor=ExtractionModel(ds_info=ds_info, llm=gpt)
prompt=extractor.generate_prompt(narratives[0])
print(prompt)


        An LLM was used to create a narrative to explain and interpret a prediction 
        made by another smaller classifier model. The LLM was given an explanation of 
        the classifier task, the training data, and provided with the exact names of all 
        the features and their meaning. Most importantly, the LLM was provided with a table 
        that contains the feature values of that particular instance, the average feature values and their SHAP values 
        which are a numeric measure of their importance. Here is some general info about the task:

        Dataset description: The dataset contains information from the 1970s in Germany on a series of debtors that took a loan from the bank. It includes many detailed categorical and one-hot variables about their financial situation. Keep in mind that at the time Germany used Deutsche Marks (DM) with an average yearly salary of 10 000 to 20 000 DM,
        Target description: The target variable is whether the customer

In [12]:
extraction=extractor.generate_extractions(narratives)
extraction[0]

Extracted story 1/1 with gpt-4o


{'Mjob_teacher': {'rank': 0,
  'sign': 1,
  'value': None,
  'assumption': 'Having a mother in a teaching profession likely provides a supportive and academically-oriented environment.'},
 'absences': {'rank': 1,
  'sign': -1,
  'value': 2,
  'assumption': 'Even a relatively low number of absences can hinder academic performance.'},
 'failures': {'rank': 2,
  'sign': 1,
  'value': None,
  'assumption': 'A lack of past class failures indicates a strong academic foundation.'},
 'goout': {'rank': 3,
  'sign': 1,
  'value': None,
  'assumption': 'A moderate frequency of going out with friends may help maintain a balance between social life and academic responsibilities.'}}

In [13]:
generator.explanation_list[0].head(4)

Unnamed: 0,feature_name,SHAP_value,feature_value,feature_average,feature_desc
0,goout,0.063416,2,3.098101,Going out with friends (from 1 - very low to 5...
1,failures,0.038342,0,0.360759,Number of past class failures (from 0 to 3)
2,absences,0.036718,2,5.75,Number of school absences (actual number of ab...
3,Mjob_teacher,-0.030872,1,0.136076,One-hot variable for mothers's job -- teacher


In [14]:
rank_diff, sign_diff , value_diff, real_rank, extracted_rank=extractor.get_diff(extraction[0],generator.explanation_list[0])

In [15]:
rank_diff

[-3.0, -1.0, 1.0, 3.0]

In [16]:
sign_diff

[1.0, 1.0, 0.0, 0.0]