In [1]:
# go up a directory
import os
os.chdir('..')

In [2]:
import numpy as np
import glob
import matplotlib.pyplot as plt

In [3]:
files_dir = 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/'

# list files in directory that end in summary.txt
txt_file_replies = glob.glob(files_dir + '*summary.txt')

In [4]:
# sort files so that they're read in numerical order: 0_gpt-4-0125-preview_ANN_L_beauty_summary.txt, 1_gpt-4-0125-preview_ANN_L_beauty_summary.txt, etc.
txt_file_replies = sorted(txt_file_replies, key=lambda x: int(x.split('/')[-1].split('_')[0]))

In [5]:
txt_file_replies

['outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/0_gpt-4-0125-preview_ANN_L_beauty_summary.txt',
 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/1_gpt-4-0125-preview_ANN_L_beauty_summary.txt',
 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/2_gpt-4-0125-preview_ANN_L_beauty_summary.txt',
 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/3_gpt-4-0125-preview_ANN_L_beauty_summary.txt',
 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/4_gpt-4-0125-preview_ANN_L_beauty_summary.txt',
 'outputs/LLM_QueryAndReply/20240329_004155_gpt-4-0125-preview_nshot16_k5_prompt-senti_classif_remove_words_v1_beauty_ann_l/5_gpt-4-0125-pre

In [12]:
# read in each file
replies = []
for file in txt_file_replies:
    with open(file, 'r') as f:
        replies.append(f.read())

In [13]:
replies[0]

'0_gpt-4-0125-preview_ANN_L_beauty_summary\ntemperature:\t\t0\nn_shot:\t\t\t16\nexplanation_mode:\tperturb\neval_idx:\t\t0\nLLM:\t\t\tgpt-4-0125-preview\nk:\t\t\t5\n\nMESSAGE:\n[{\'role\': \'user\', \'content\': \'Context: "We are analyzing a fixed set of word removals on a specific sentence to understand the influence on the model’s output. The dataset below contains the words removed from the original sentence and the corresponding change in output."\\n\\nDataset:\\n```\\nOriginal sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.\\n\\nRemoved words: say a very wife. shes bought one for are daughter process She faster. difference. much results My great, easier noticeable and\\nChange in output: 1\\n\\nRemoved words: the say a very wife. it shes bought one for are daughter She makes wants this difference. much I noticeable\\nChange in outp

In [14]:
# strip and trailing \n get the last line of the file
replies = [reply.strip().split('\n')[-1].split(',') for reply in replies]
#strip each word of leading and trailing whitespace
replies = [[word.strip() for word in reply] for reply in replies]

In [15]:
# remove any empty strings in the list of lists
replies = [[word for word in reply if word] for reply in replies]

In [17]:
# get indices where theres not 3 words
bad_reply_indices = [i for i, reply in enumerate(replies) if len(reply) != 3]
bad_reply_indices

[75]

In [19]:
from utils import get_model_names, get_model_architecture
from openxai.dataloader import return_loaders

data_name      = 'beauty'
model_name     = 'ann_l'
base_model_dir = './models/ClassWeighted/'
model_dir, model_file_name = get_model_names(model_name, data_name, base_model_dir)


# Load dataset
download_data                         = False if data_name in ['compas', 'blood'] else True
loader_train, loader_val, loader_test = return_loaders(data_name=data_name, download=False)

X_train, y_train = loader_train.dataset.data, loader_train.dataset.targets.to_numpy()
X_val, y_val     = loader_val.dataset.data, loader_val.dataset.targets.to_numpy()
X_test, y_test   = loader_test.dataset.data, loader_test.dataset.targets.to_numpy()

# load sentences
X_train_sentences = loader_train.dataset.sentences
X_val_sentences = loader_val.dataset.sentences
X_test_sentences = loader_test.dataset.sentences

num_features = X_train.shape[1]

In [20]:
from openxai.LoadModel import DefineModel
import torch
# Load model
input_size                                          = loader_train.dataset.get_number_of_features()
dim_per_layer_per_MLP, activation_per_layer_per_MLP = get_model_architecture(model_name)
model                                          = DefineModel(model_name, input_size,
                                                                  dim_per_layer_per_MLP,
                                                                  activation_per_layer_per_MLP)
model.load_state_dict(torch.load(model_dir + model_file_name))
model.eval()

# Store test predictions
preds = model.predict(torch.tensor(X_test).float())

In [21]:
model

MLP(
  (layers): ModuleList(
    (0): Linear(in_features=384, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): ReLU()
    (6): Linear(in_features=16, out_features=2, bias=True)
  )
)

In [30]:
from sentence_transformers import SentenceTransformer
# Generate embeddings
def generate_embeddings(texts):
    embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
    batch_size = 256  # Adjust based on your system's memory capacity
    embeddings = []
    print('transforming data...')
    num_batches = len(texts) // batch_size
    for i in range(0, len(texts), batch_size):
        if i % 100 == 0:
            print(f'Processing batch {i // batch_size + 1}/{num_batches}')
        batch = texts[i:i + batch_size]
        batch_embeddings = embedding_model.encode(batch)
        embeddings.extend(batch_embeddings)
    return embeddings

In [31]:
def PGI_words(sentence, embedding, topk, text_classifier, random_baseline=False):
    # PGI = []
    # for sentence, embedding, topk in zip(input_sentences, input_embeddings, topks):
    print('original_sentence:', sentence)
    
    with torch.no_grad():
        pred_original = text_classifier(torch.tensor(embedding))
        
    if random_baseline:
        #randomly select k words to remove from sentence
        inds = np.random.choice(len(sentence.split()), len(topk), replace=False)
        topk = [sentence.split()[i] for i in inds]
    
    for word in topk:
        print(word)
        sentence = sentence.replace(word, '')
        
    print('new_sentence:', sentence)
    # get embeddings for the new sentence
    new_sentence_embedding = generate_embeddings([sentence])
    with torch.no_grad():
        pred_removed = text_classifier(torch.tensor(new_sentence_embedding))
    PGI = torch.abs(pred_original[1] - pred_removed[0][1]).item()
    print(PGI)
    # PGI.append(pgi_word)
        
    return PGI

In [24]:
PGI_scores = PGI_words(X_test_sentences[0], X_test[0], replies[0], model)

original_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
process
results
great
new_sentence: I bought this for my wife.  She says that it makes the  easier and much faster.  I say that the  are , a very noticeable difference. My daughter says shes wants one too.
transforming data...
Processing batch 1/0
7.05718994140625e-05


  pred_removed = text_classifier(torch.tensor(new_sentence_embedding))


In [36]:
from sklearn.metrics import auc
def calculateFaithfulnessAUC_text(input_sentences, input_embeddings, explanations, text_classifier, min_idx, max_idx, max_k, random_baseline=False):

    PGI_AUC = []
    for index in range(min_idx, max_idx):
        if index == bad_reply_indices:
            continue
        print(index)
        if max_k > 1:
            auc_x = np.arange(max_k) / (max_k - 1)
        PGI = []
        for top_k in range(1, max_k + 1):
            print('top_k', top_k)
            PGI.append(PGI_words(input_sentences[index], input_embeddings[index], explanations[index][:top_k], text_classifier, random_baseline))#[exp[:top_k] for exp in explanations], text_classifier))
        if max_k > 1:
            PGI_AUC.append(auc(auc_x, PGI))
        else:
            PGI_AUC.append(PGI)

    return PGI_AUC

In [26]:
PGI_AUC = calculateFaithfulnessAUC_text(X_test_sentences, X_test, replies, model, 0, 100, 3)

0
original_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
process
new_sentence: I bought this for my wife.  She says that it makes the  easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
transforming data...
Processing batch 1/0
2.753734588623047e-05
original_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
process
results
new_sentence: I bought this for my wife.  She says that it makes the  easier and much faster.  I say that the  are great, a very noticeable difference. My daughter says shes wants one too.
transforming data...
Processing batch 1/0
9.989738464355469e-05
original_sentence: I bought this for my wife.

In [110]:
print(PGI_AUC)

[7.170438766479492e-05, 0.38246825337409973, 1.6391277313232422e-06, 3.844499588012695e-06, 0.18601280450820923, 9.084994246677525e-06, 7.30403937332369e-09, 2.5063753128051758e-05, 0.0011120736598968506, 0.00014010071754455566, 0.23288759589195251, 7.572193740090771e-12, 0.00013464689254760742, 8.624792098999023e-05, 0.20396886207163334, 0.281537551432848, 0.08540374040603638, 1.0311603546142578e-05, 5.558133125305176e-05, 0.21534782648086548, 0.0032357651507481933, 0.0, 9.305870662501547e-06, 0.0037360787391662598, 0.0, 2.303651490365155e-05, 0.9392543584108353, 0.0002658069133758545, 0.0, 0.155643438178231, 5.841255187988281e-06, 0.00016054511070251465, 2.980232238769531e-07, 0.00047343969345092773, 0.0, 9.54473016463453e-05, 1.8656253814697266e-05, 0.00018352270126342773, 0.0001274944188480731, 1.1920928955078125e-07, 0.0208753515034914, 0.02886722981929779, 1.043081283569336e-05, 0.0, 2.7019382855542062e-06, 0.3613387793302536, 0.0007876157760620117, 0.00019630789756774902, 1.4563

In [111]:
len(PGI_AUC)

100

In [112]:
# str(round(np.mean(PGU_AUC), 3)) + '+/-' + str(round(np.std(PGU_AUC)/np.sqrt(N_samps), 3))
str(round(np.mean(PGI_AUC), 3)) + '+/-' + str(round(np.std(PGI_AUC)/np.sqrt(100-len(bad_reply_indices)), 3))

'0.047+/-0.014'

In [27]:
str(round(np.mean(PGI_AUC), 3)) + '+/-' + str(round(np.std(PGI_AUC)/np.sqrt(100-len(bad_reply_indices)), 3))

'0.149+/-0.027'

In [37]:
# random baseline
PGI_AUC = calculateFaithfulnessAUC_text(X_test_sentences, X_test, replies, model, 0, 100, 3, random_baseline=True)

0
top_k 1
original_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
results
new_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the  are great, a very noticeable difference. My daughter says shes wants one too.
transforming data...
Processing batch 1/0
6.520748138427734e-05
top_k 2
original_sentence: I bought this for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants one too.
one
this
new_sentence: I bought  for my wife.  She says that it makes the process easier and much faster.  I say that the results are great, a very noticeable difference. My daughter says shes wants  too.
transforming data...
Processing batch 1/0
4.291534423828125e-06
top_k 3
original_sentence: I 

In [38]:
#random baseline
str(round(np.mean(PGI_AUC), 3)) + '+/-' + str(round(np.std(PGI_AUC)/np.sqrt(100), 3))

'0.051+/-0.011'