## Get Explanations

### GPT explanation

In [16]:
# imports
import os
import ast  # for converting embeddings saved as strings back to arrays
import openai  
import pandas as pd
import tiktoken  # for counting tokens
from scipy import spatial  # for calculating vector similarities for search
openai.api_key = os.environ["OPENAI_API_KEY"]

# models
EMBEDDING_MODEL = "text-embedding-ada-002"
# GPT_MODEL = "gpt-3.5-turbo"
GPT_MODEL = "gpt-4" # According to OpenAI, GPT-4 is more responsive to system messages, whereas 3.5 would rely on more of the user input.


In [17]:
# an example nle question
premise = "Which of the following should you not bring a fox in"
choices = ['hen house', 'England', 'elementary school']
query = f"""Question: {premise}:
Choices: {"  ".join([f"{i+1}.{choices[i]}" for i in range(len(choices))])}"""

sys_msg = """
Make a selection and explain your reasoning.
Reply in the following format: the first line is contains only the choice number,
and the second line is the explanation.
"""

response = openai.ChatCompletion.create(
    messages=[
        {'role': 'system', 'content': sys_msg},
        {'role': 'user', 'content': query},
    ],
    model=GPT_MODEL,
    temperature=0,
)

model_response = response['choices'][0]['message']['content']

In [18]:
y_pred, model_nle = model_response.split('\n')
y_pred = int(y_pred)
y_pred, model_nle 

(1,
 'Bringing a fox into a hen house would be disastrous, as foxes are natural predators of chickens and would likely cause harm or death to the hens.')

## Obtain NLE Embeddings

In [None]:
import torch
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large')
roberta.eval()  # disable dropout (or leave in train mode to finetune)
roberta.to('cuda')
tok_model_nle = roberta.encode(model_nle)
tok_choices = [roberta.encode(ch) for ch in choices]

In [None]:
# embed the tokens using the last layer feature of the model
emb_model_nle = roberta.extract_features(tok_model_nle)
tok_choices = [roberta.extract_features(ch) for ch in tok_choices]


## Calculate SHAP Score

In [None]:
import shap
X100 = shap.utils.sample(X, 100)
shap.partial_dependence_plot(
    your_feature_name, your_function_to_get_y_pred, X100, ice=False,
    model_expected_value=True, feature_expected_value=True
)

In [None]:
explainer = shap.Explainer(your_function_to_get_y_pred, X100)
shap_values = explainer(X)