# Setup

## Load Packages

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import transformers
import os
import openai
import re
from helper_funcs import prepare_examples, create_query
from dotenv import load_dotenv

load_dotenv()

True

## Prepare Data

In [2]:
train1 = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_test.csv')

In [3]:
dev.shape

(9842, 19)

In [4]:
train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

In [18]:
n_dev = 9842 # number of dev data points

np.random.seed(12345) # seed for numpy package
random.seed(12345) # seed for random package
dev_indices = list(np.random.choice(dev.index.values, size=n_dev, replace=False))
prompts_instruct_GPT = [prepare_examples(train, size_per_class=2, style="customInstruct") + create_query(dev.loc[i], style="customInstruct") for i in dev_indices]
prompts_GPT3 = [prepare_examples(train, size_per_class=2) + create_query(dev.loc[i]) for i in dev_indices]

In [19]:
dev_prepared = pd.DataFrame()
dev_prepared['gold_standard_explanation'] = dev.loc[dev_indices].Explanation_1
dev_prepared['gold_standard_label'] = dev.loc[dev_indices].gold_label
dev_prepared['prompts_instruct_GPT'] = prompts_instruct_GPT
dev_prepared['prompts_GPT3'] = prompts_GPT3

In [10]:
print(dev_prepared.head()['prompts_instruct_GPT'].iloc[0])

Classify into entailment, neutral, or contradiction and justify the decision.

Premise: A man with a tennis racket goes for the ball.
Hypothesis: A person playing tennis.
Label: entailment
Explanation: Man is a person, as with tennis racket is inferred to play tennis.
###
Premise: A dog chases a stuffed animal attached to a string.
Hypothesis: A dog chases a plushie attatched to a rope.
Label: entailment
Explanation: dog chases a stuffed animal simply means dog chases a plushie
###
Premise: A red-haired girl holding an animal.
Hypothesis: A boy is holding a toy.
Label: contradiction
Explanation: One cannot be holding an animal while holding a toy.
###
Premise: Four young trendy women sitting and talking in a public place.
Hypothesis: Four women are sitting and arguing in public.
Label: neutral
Explanation: Not all talking is arguing.
###
Premise: A man in white khakis, a white hat, a black t-shirt, and a gray vest and a young boy in a white t-shirt and dark colored shorts appear to be 

In [11]:
print(dev_prepared.head()['prompts_GPT3'].iloc[0])

Premise: The motocross rider jumps high in the air on his bike.
Hypothesis: The motorcross rider rides down the hill.
Label: contradiction
Explanation: If he is high in the air, then he is not riding down a hill.
###
Premise: two men waiting in line one wearing black suit.
Hypothesis: a bird was on rocks
Label: entailment
Explanation: A bird is black and can also wait in line.
###
Premise: A man wearing a raincoat and a white hat is sweeping outside on wet brick floors.
Hypothesis: A guy is sweeping outside on the wet brick floors while wearing a raincoat and a white hat.
Label: entailment
Explanation: A man is a guy.
###
Premise: An elderly woman with a cane passes a garden of flowers.
Hypothesis: A woman is asleep.
Label: contradiction
Explanation: One cannot be passing a garden or be asleep simultaneously.
###
Premise: Two people sit at a table outside against a wall and make faces.
Hypothesis: Couple is taking pictures.
Label: neutral
Explanation: The two people making faces don't 

# Find Hyperparameters

In [9]:
tokenizer = transformers.GPT2TokenizerFast.from_pretrained('gpt2')
explanation_token_length = [len(tokenizer.tokenize(train.iloc[i].Explanation_1)) for i in range(train.head().shape[0])]

In [10]:
np.quantile(explanation_token_length, 0.999)

20.972

In [20]:
tokenizer = transformers.GPT2TokenizerFast.from_pretrained('gpt2')
explanation_token_length = [len(tokenizer.tokenize(prompt)) for prompt in dev_prepared.prompts_instruct_GPT]

In [21]:
np.mean(explanation_token_length)

400.0465352570616

In [31]:
len(tokenizer.tokenize("Does the man with long hair contradict women with baby"))
explanation_token_length = [len(tokenizer.tokenize(prompt)) for prompt in dev_prepared.prompts_instruct_GPT]

In [32]:
np.quantile(explanation_token_length, 0.999)

433.946

In [13]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [14]:
for i in range(6):
    response = openai.Completion.create(
        #engine='davinci-instruct-beta',
        engine='text-davinci-003',
        prompt = dev_prepared['prompts_instruct_GPT'].iloc[i],
        temperature=0,
        max_tokens=58,
        top_p=1,
    )
    print(dev_prepared['prompts_instruct_GPT'].iloc[i])
    print(response.choices[0].text)
    print('==============================')

Classify into entailment, neutral, or contradiction and justify the decision.

Premise: A man with a tennis racket goes for the ball.
Hypothesis: A person playing tennis.
Label: entailment
Explanation: Man is a person, as with tennis racket is inferred to play tennis.
###
Premise: A dog chases a stuffed animal attached to a string.
Hypothesis: A dog chases a plushie attatched to a rope.
Label: entailment
Explanation: dog chases a stuffed animal simply means dog chases a plushie
###
Premise: A red-haired girl holding an animal.
Hypothesis: A boy is holding a toy.
Label: contradiction
Explanation: One cannot be holding an animal while holding a toy.
###
Premise: Four young trendy women sitting and talking in a public place.
Hypothesis: Four women are sitting and arguing in public.
Label: neutral
Explanation: Not all talking is arguing.
###
Premise: A man in white khakis, a white hat, a black t-shirt, and a gray vest and a young boy in a white t-shirt and dark colored shorts appear to be 

In [15]:
for i in range(6):
    response = openai.Completion.create(
        #engine='davinci-instruct-beta',
        engine='text-davinci-003',
        prompt = dev_prepared['prompts_GPT3'].iloc[i],
        temperature=0,
        max_tokens=58,
        top_p=1,
    )
    print(dev_prepared['prompts_instruct_GPT'].iloc[i])
    print(response.choices[0].text)
    print('==============================')

Classify into entailment, neutral, or contradiction and justify the decision.

Premise: A man with a tennis racket goes for the ball.
Hypothesis: A person playing tennis.
Label: entailment
Explanation: Man is a person, as with tennis racket is inferred to play tennis.
###
Premise: A dog chases a stuffed animal attached to a string.
Hypothesis: A dog chases a plushie attatched to a rope.
Label: entailment
Explanation: dog chases a stuffed animal simply means dog chases a plushie
###
Premise: A red-haired girl holding an animal.
Hypothesis: A boy is holding a toy.
Label: contradiction
Explanation: One cannot be holding an animal while holding a toy.
###
Premise: Four young trendy women sitting and talking in a public place.
Hypothesis: Four women are sitting and arguing in public.
Label: neutral
Explanation: Not all talking is arguing.
###
Premise: A man in white khakis, a white hat, a black t-shirt, and a gray vest and a young boy in a white t-shirt and dark colored shorts appear to be 

In [11]:
dev_prepared

Unnamed: 0,gold_standard_explanation,gold_standard_label,prompts_instruct_GPT,prompts_GPT3
9594,Just because a lady is opening a present doesn...,neutral,Answer the Question and provide a reason why t...,Premise: A man in a black t-shirt is standing ...
3300,Does skateboard tricks implies on a skateboard.,entailment,Answer the Question and provide a reason why t...,Premise: An old woman wearing a white tank top...
7520,A man observes must be examining.,entailment,Answer the Question and provide a reason why t...,Premise: A woman with weird teeth sings out wh...
840,people are men and are friends,neutral,Answer the Question and provide a reason why t...,Premise: A boy with a black and white striped ...
5854,A group of construction workers bears no resem...,contradiction,Answer the Question and provide a reason why t...,Premise: There is a large crowd lined up along...
4680,Players can not be getting a drink during a ma...,contradiction,Answer the Question and provide a reason why t...,Premise: A family playing in the pool.\nHypoth...
3349,Runners are people and they are in the forest ...,entailment,Answer the Question and provide a reason why t...,Premise: An old man wearing gray sleeps in a s...
7153,The pamphlet may not be about photo shoot prep...,neutral,Answer the Question and provide a reason why t...,Premise: A woman in an orange sweater have a d...
6639,Cowboy cannot control a horse and make cheese ...,contradiction,Answer the Question and provide a reason why t...,Premise: Two shirtless young boys are posing f...
5352,The could have been taking it apart instead of...,neutral,Answer the Question and provide a reason why t...,Premise: 5 teenagers playing twister of some k...


In [23]:
response = openai.Completion.create(
    engine='text-davinci-003',
    #prompt=list(dev_prepared.prompts_instruct_GPT.iloc[1]),
    prompt = dev_prepared.head()['prompts_GPT3'].iloc[0],
    temperature=0,
    max_tokens=58,
    top_p=1,
)
print(dev_prepared.head()['prompts_GPT3'].iloc[0])
print(response.choices[0].text)

Premise: Two smiling women in costumes standing in front of a microphone.
Hypothesis: The ladiesare standing in front a microphone have smile while doing jumping jacks
Label: neutral
Explanation: Because two women in costumes are smiling while standing in front of a microphone does not imply that they would do so while doing jumping jacks.
###
Premise: The boy wearing orange face paint is carrying a toy fish in around his next.
Hypothesis: A boy carrying a toy fish
Label: neutral
Explanation: The boy could be anybody and not necessarily wearing orange face paint.
###
Premise: A man dressed in black is sitting on a seat in the subway station that has an opened black umbrella laying on the ground.
Hypothesis: The man is waiting for the subway.
Label: neutral
Explanation: Just because the man is sitting on a seat in the subway station,  it doesn't imply he is waiting for the subway.
###
Premise: These two kids look like they are having fun just playing on the sidewalk.
Hypothesis: Two kid

In [None]:
results = pd.DataFrame()
for col_name in dev_prepared.columns.values[2:]:
    for temp_train in [0, 0.5, 1]:
        response = openai.Completion.create(
            engine='text-davinci-003',
            prompt=list(dev_prepared[col_name]),
            temp_trainerature=temp_train,
            max_tokens=58,
            top_p=1,
        )
        try:
            preds = [re.findall('Explanation:.+', response.choices[i].text)[0].replace('Explanation: ', '') for i in range(len(response.choices))]
        except:
            preds = [response.choices[i].text.strip() for i in range(len(response.choices))]
        results['pred_temp_trainerature_' + str(temp_train) + '_' + col_name] = preds

In [None]:
results.head()

In [None]:
results.to_csv('output_data/predsictions.csv', sep=';')

In [None]:
F1_means = dict()
for col_name in results.columns.values:
    P, R, F1 = score(list(results[col_name]), list(dev_prepared['gold_standard_explanation']), lang="en", verbose=False)
    F1_means[col_name] = F1.mean().item()
print(F1_means)

In [None]:
F1_means

In [21]:
gpt3_esnli_label_mapping = {0: 'Yes', 1: 'Maybe', 2: 'No'}  # Tried True, False, Neither but this works better.
gpt3_t5_esnli_label_mapping = {'Yes': 'entailment', 'Maybe': 'neutral', 'No': 'contradiction'}

gpt3_sbic_label_mapping = {"offensive": 'Yes', "not offensive": 'No'}


def _get_task_description(task):
    if task == 'sbic':
        return "Is the following post offensive? and why?\n"
    elif task == 'esnli':
        return "Answer the question and provide a reason why the answer is correct.\n"
    elif task == 'cos_e':
        return "Answer the question from the provided choices, and provide a reason why the answer is correct.\n"
    elif task == 'sensemaking':
        return "Which of the two choices makes more sense? and why?\n"
    else:
        assert False


def _convert_instance_to_prompt(instance, task):
    if task == 'sbic':
        post = instance['post'].replace('\n', ' ')  # a few posts have new lines
        input = f"Post: {post}\n"\
                "Answer:"
        output = f" {gpt3_sbic_label_mapping[instance['offensiveYN']]}\n"\
                 f"Reason: {instance['targetStereotype']}\n"\
                 "###\n"
    elif task == 'esnli':
        input = f"{instance['premise']}\n"\
                f"Question: Is {instance['hypothesis'].lower().replace('.', '')}?\n"\
                "Answer:"
        output = f" {instance['label']}\n"\
                 f"Reason: {instance['explanation_1']}\n"\
                 "###\n"
    elif task == 'cos_e':
        choices = ', '.join(instance['choices'])
        input = f"Question: {instance['question']}\n"\
                f"Choices: {choices}\n"\
                "Answer:"
        output = f" {instance['answer']}\n"\
                 f"Reason: {instance['abstractive_explanation']}\n"\
                 "###\n"
    elif task == 'sensemaking':
        input = f"choice1: {instance['sent0']}\n"\
                f"choice2: {instance['sent1']}\n"\
                "Answer:"
        # flip choices to select the one that makes "more" sense. GPT3 works much better with this prompt
        label = 1 if instance['label'] == 1 else 2
        output = f" choice{label}\n"\
                 f"Reason: {instance['explanation']}\n"\
                 "###\n"
    else:
        assert False

    return input, output


def _parse_response(response_text, task):
    if task in ['cos_e', 'esnli', 'sensemaking', 'sbic']:
        if 'Reason:' not in response_text:
            print('Invalid output format')  # rarely happens
            pred_answer = 'wrong'
            pred_explanation = response_text.replace('\n', '')
        else:
            splits = response_text.split('Reason:', 1)
            pred_answer = splits[0].strip()
            pred_explanation = splits[1].strip()
            pred_explanation = pred_explanation.split('\n')[0]
            if task == 'esnli':
                pred_answer = gpt3_t5_esnli_label_mapping.get(pred_answer) or pred_answer
            if task == 'sensemaking':
                # flip it back
                pred_answer = pred_answer.replace('1', '2') if '1' in pred_answer else pred_answer.replace('2', '1')
    else:
        assert False

    return pred_answer, pred_explanation

In [23]:
temp_train = train.head()
temp_train["premise"] = temp_train["Sentence1"]
temp_train["hypothesis"] = temp_train["Sentence2"]
temp_train["label"] = temp_train["gold_label"]
temp_train["explanation_1"] = temp_train["Explanation_1"]

temp_test = test.head()
temp_test["premise"] = temp_test["Sentence1"]
temp_test["hypothesis"] = temp_test["Sentence2"]
temp_test["label"] = temp_test["gold_label"]
temp_test["explanation_1"] = temp_test["Explanation_1"]

task = "esnli"
task_description = _get_task_description(task)
train_prompts = [_convert_instance_to_prompt(temp_train.iloc[i], task) for i in range(temp_train.shape[0])]
test_prompts = [_convert_instance_to_prompt(temp_test.iloc[i], task) for i in range(temp_test.shape[0])]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_train["premise"] = temp_train["Sentence1"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_train["hypothesis"] = temp_train["Sentence2"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_train["label"] = temp_train["gold_label"]
A value is trying to be set on a copy of a slice from a Dat

In [24]:
train_prompts[0]

('A person on a horse jumps over a broken down airplane.\nQuestion: Is a person is training his horse for a competition?\nAnswer:',
 ' neutral\nReason: the person is not necessarily training his horse\n###\n')

In [25]:
test_prompts[0]

('This church choir sings to the masses as they sing joyous songs from the book at a church.\nQuestion: Is the church has cracks in the ceiling?\nAnswer:',
 ' neutral\nReason: Not all churches have cracks in the ceiling\n###\n')

In [32]:
dev.shape

(9842, 19)