In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3,5,6"
from chatgpt import run_prompt
import spacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")
MODEL = "GPT-3.5"
model = "gpt-3.5-turbo-1106" if MODEL == "GPT-3.5" else "gpt-4-0125-preview" if MODEL == "GPT-4" else None
SYSTEM_PROMPT = ""
dataset = pd.read_csv("../data/stitched.tsv", sep="\t", header=0)


In [None]:
def replace_verb_with_throw(sentence_object):
    sentence_split = sentence_object['sentence'].split()
    doc = nlp(sentence_object['sentence'])
    # get tag of original verb
    for token_i, token in enumerate(doc):
        if token_i == int(sentence_object['verb_i']):
            original_verb_tag = token.tag_
            break

    # now inflect throw to be the same tense as the original verb
    if original_verb_tag == 'VBG':
        new_verb_form = 'throwing'
    elif original_verb_tag == 'VBD':
        new_verb_form = 'threw'
    elif original_verb_tag == 'VBZ':
        new_verb_form = 'throws'
    elif original_verb_tag == 'VBP':
        new_verb_form = 'throw'
    elif original_verb_tag == 'VB':
        new_verb_form = 'throw'
    elif original_verb_tag == 'VBN':
        new_verb_form = 'thrown'
    else:
        # print(f"Error: verb tag {original_verb_tag} not recognized")
        return None
    sentence_split[int(sentence_object['verb_i'])] = new_verb_form
    sentence_object['sentence'] = ' '.join(sentence_split)
    return sentence_object

def get_instruction(sentence_object, sentence_string):
    sentence_split = sentence_object['sentence'].split()
    doc = nlp(sentence_object['sentence'])
    direct_object_lemmatised = sentence_object["direct_object"]
    direct_object_unlemmatised = sentence_split[int(sentence_object['direct_object_i'])]
    # get tag of original verb
    for token_i, token in enumerate(doc):
        if token_i == int(sentence_object['verb_i']):
            original_verb_tag = token.tag_
            break
    for token_i, token in enumerate(doc):
        if token_i == int(sentence_object['direct_object_i']):
            original_dobj_tag = token.tag_
            break
    if original_dobj_tag == 'NNS':
        be_form = 'are'
        direct_object = direct_object_unlemmatised
    elif original_dobj_tag == 'NN':
        be_form = 'is'
        direct_object = direct_object_unlemmatised
    elif original_dobj_tag == 'NNPS':
        be_form = 'are'
        direct_object = direct_object_unlemmatised
    elif original_dobj_tag == 'NNP':
        be_form = 'is'
        direct_object = direct_object_unlemmatised
    elif original_dobj_tag == 'PRP':
        if direct_object_lemmatised.lower() in ['it', 'he', 'she']:
            be_form = 'is'
            direct_object = direct_object_lemmatised
        elif direct_object_lemmatised.lower() in ['they', 'we', 'you']:
            be_form = 'are'
            direct_object = direct_object_lemmatised
        elif direct_object_lemmatised.lower() in ['i']:
            be_form = 'am'
            direct_object = direct_object_lemmatised
        elif direct_object_lemmatised.lower() == "himself":
            be_form = 'is'
            direct_object = "he"
        elif direct_object_lemmatised.lower() == "herself":
            be_form = 'is'
            direct_object = "she"
        elif direct_object_lemmatised.lower() == "itself":
            be_form = 'is'
            direct_object = "it"
        elif direct_object_lemmatised.lower() == "themselves":
            be_form = 'are'
            direct_object = "they"
        elif direct_object_lemmatised.lower() == "myself":
            be_form = 'am'
            direct_object = "I"
        elif direct_object_lemmatised.lower() == "yourself":
            be_form = 'are'
            direct_object = "you"
        else:
            # print(f"Error: pronoun {direct_object_lemmatised} not recognized")
            return None
    else:
        # print(f"{direct_object_unlemmatised} {original_dobj_tag} ")
        return None
    default_instruction = f"In the sentence '{sentence_string}', {be_form} {direct_object} moving, yes or no?.\nAnswer:"
    return default_instruction

def parse_system_answer(system_answer):
    system_answer = system_answer.lower()
    if system_answer[:3] == "yes" or "yes," in system_answer or "the answer is yes" in system_answer or "there is an implication that something is moving" in system_answer or "it seems that something is moving" in system_answer or 'the answer is "yes."' in system_answer:
        return True
    elif system_answer[:2] == "no" or "no," in system_answer or "no explicit indication" in system_answer or "the answer is no" in system_answer or "there is not an explicit mention of something moving"  in system_answer or "it is not explicitly mentioned whether something is moving or not" or 'the answer is "no."' in system_answer:   
        return False
    else:
        # print(system_answer)
        return "invalid"

In [None]:
import os
answers_path = None
# list all files 
files = os.listdir(answers_path)
for file in files:
    print(file)

In [None]:
from tqdm import tqdm
import json
import pandas as pd
import spacy
answers_path = None
nlp = spacy.load("en_core_web_sm")

sentences_for_model = []
sentence_counter = 0
sentence_i_to_sentence_pairs = {}
instruction_to_sentence = {}
sentence_to_replaced_instruction = {}
dataset = pd.read_csv("../data/stitched.tsv", sep="\t", header=0)
for _, sentence in tqdm(dataset.iterrows()):
    original_sentence = sentence["sentence"]
    replaced_sentence_obj = replace_verb_with_throw(sentence)
    if replaced_sentence_obj is None:
        continue
    replaced_sentence = replaced_sentence_obj["sentence"]
    # print(replaced_sentence)
    original_instruction = get_instruction(sentence, original_sentence)
    replaced_instruction = get_instruction(sentence, replaced_sentence)
    if original_instruction  and replaced_instruction:
        instruction_to_sentence[original_instruction] = sentence
        sentence_to_replaced_instruction[sentence["sentence"]] = replaced_instruction
        # sentence_i_to_sentence_pairs[sentence_counter] = {"original": original_sentence, "replaced": replaced_sentence}
        # sentence_counter += 1
        sentences_for_model.append(original_instruction)
        sentences_for_model.append(replaced_instruction)
            
# write sentences_for_model to a json file
with open('sentences_for_model.json', 'w') as f:
    json.dump(sentences_for_model, f)
    

In [None]:

# mistralai_Mistral-7B-Instruct-v0.2_model_outputs.json
# meta-llama_Llama-2-7b-hf_model_outputs.json
# mistralai_Mistral-7B-Instruct-v0.1_model_outputs.json
# mistralai_Mistral-7B-v0.1_model_outputs.json
# meta-llama_Llama-2-7b-chat-hf_model_outputs.json
# meta-llama_Llama-2-13b-hf_model_outputs.json
# mistralai_Mixtral-8x7B-Instruct-v0.1_model_outputs.json
# gemini_pro_model_outputs.json
# TheBloke_Llama-2-70B-AWQ_model_outputs.json
# microsoft_phi-2_model_outputs.json
# meta-llama_Llama-2-13b-chat-hf_model_outputs.json
# mistralai_Mixtral-8x7B-v0.1_model_outputs.json
# TheBloke_Llama-2-70B-Chat-AWQ_model_outputs.json

In [None]:
from tqdm import tqdm
import json
import pandas as pd
import spacy
answers_path = None
model_string = 'TheBloke_Llama-2-70B-Chat-AWQ_model_outputs.json'
results = {"both_yes": [], "both_no": [], "from_yes_to_no": [], "from_no_to_yes": []}
# get the model answers
model_answers = json.load(open(answers_path + model_string, 'r'))
print(len(model_answers))
for original_instruction, sentence in instruction_to_sentence.items():
    try:
        result_original = model_answers[original_instruction]
        result_replaced = model_answers[sentence_to_replaced_instruction[sentence["sentence"]]]
    except KeyError:
        print(f"Error: sentence {sentence['sentence']} not found")
        continue

    parsed_result_original = parse_system_answer(result_original)
    parsed_result_replaced = parse_system_answer(result_replaced)
    if parsed_result_original == "invalid" or parsed_result_replaced == "invalid":
        print(result_original, result_replaced, "invalid")
    else:
        if parsed_result_original and parsed_result_replaced:
            results["both_yes"].append(sentence)
        elif not parsed_result_original and not parsed_result_replaced:
            results["both_no"].append(sentence)
        elif parsed_result_original and not parsed_result_replaced:
            results["from_yes_to_no"].append(sentence)
        elif not parsed_result_original and parsed_result_replaced:
            results["from_no_to_yes"].append(sentence)
                
# for result_class in ["both_yes", "both_no", "from_yes_to_no", "from_no_to_yes"]:
#     print(f"Class: {result_class} {len(results[result_class])}")
#     verb_counts = pd.Series([sentence['verb'] for sentence in results[result_class]]).value_counts()
#     print(verb_counts)
#     for verb, count in verb_counts.items():
#         print(f"{verb}: {count}", end=", ")

total_num = sum([len(results[result_class]) for result_class in ["both_yes", "both_no", "from_yes_to_no", "from_no_to_yes"]])
print("both yes percentage {0:.2f}".format(len(results["both_yes"])/total_num*100))

print("from no to yes percentage {0:.2f}".format(len(results["from_no_to_yes"])/total_num*100))
print("invalid percentage {0:.2f}".format((len(results["both_no"])+len(results["from_yes_to_no"]))/total_num*100))
print(model_string)
    

In [None]:
print(f"Both yes: {len(results['both_yes'])}")
print(f"Both no: {len(results['both_no'])}")
print(f"From yes to no: {len(results['from_yes_to_no'])}")
print(f"From no to yes: {len(results['from_no_to_yes'])}")

total_num = sum([len(results[result_class]) for result_class in ["both_yes", "both_no", "from_yes_to_no", "from_no_to_yes"]])
print("both yes percentage {0:.2f}".format(len(results["both_yes"])/total_num*100))

print("from no to yes percentage {0:.2f}".format(len(results["from_no_to_yes"])/total_num*100))
print("invalid percentage {0:.2f}".format((len(results["both_no"])+len(results["from_yes_to_no"]))/total_num*100))
print(model_string)
    