# Setup

## Load Packages

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openai
import re
import time
from scripts.prepare_data_helpers import prepare_examples, create_query
from dotenv import load_dotenv

load_dotenv()

True

## Prepare Data

In [2]:
train1 = pd.read_csv('../../e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../../e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../../e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../../e-SNLI/dataset/esnli_test.csv')

train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(test.index.values, size=1000, replace=False))
test = test.loc[test_indices]
test = test.reset_index(drop=True)

In [3]:
np.random.seed(12345) # seed for numpy package
random.seed(12345) # seed for random package
prompts = [prepare_examples(train, size_per_class=1, include_label=True) + '\n\n' + create_query(test.loc[i], include_label=True) for i in test.index]
test['prompt'] = prompts

# Generate Predictions on Test Set

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [9]:
pred_expl = list()
pred_label = list()
for i in range(int(np.ceil(test.shape[0] / 20))):
    if i != 0:
        time.sleep(60) # API allows only 20 calls or one call with 20 prompts per minute
    indices = np.arange(i * 20, (i + 1) * 20)
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=list(test.iloc[indices].prompt),
        temperature=0.5,
        max_tokens=58,
        top_p=1,
    )
    pred_expl += [re.findall('Explanation:.+', response.choices[i].text)[0].replace('Explanation: ', '').strip() for i in range(len(response.choices))]
    pred_label += [re.findall('.+\nExplanation:', response.choices[i].text)[0].replace('\nExplanation:', '').replace('Label: ', '').strip() for i in range(len(response.choices))]

In [11]:
test_output = test
test_output['pred_explanation'] = pred_expl
test_output['pred_label'] = pred_label
test_output.to_csv('../output_data/test_predictions.csv')

In [12]:
test_output

Unnamed: 0,pairID,gold_label,Sentence1,Sentence2,Explanation_1,Sentence1_marked_1,Sentence2_marked_1,Sentence1_Highlighted_1,Sentence2_Highlighted_1,Explanation_2,...,Sentence1_Highlighted_2,Sentence2_Highlighted_2,Explanation_3,Sentence1_marked_3,Sentence2_marked_3,Sentence1_Highlighted_3,Sentence2_Highlighted_3,prompt,pred_explanation,pred_label
0,4813951931.jpg#3r1e,entailment,A young man in an orange hat walks his bicycle...,A man in an orange hat walks his bike down a s...,"A bicycle and a bike, are the same object.",A young man in an orange hat walks his *bicyc...,A man in an orange hat walks his *bike* down ...,9,8,A young man is still a man and a bicycle and b...,...,912,81,The young man in an orange hart is the man wal...,A *young* *man* in *an* *orange* *hat* *walks...,A *man* in *an* *orange* *hat* *walks* *his* ...,124567891012,1543119876,Statement: A man in a suit and dirty dress-sho...,The man is young and the hat is orange.,entailment
1,4678320536.jpg#1r1c,contradiction,A woman standing with smile on her face and sh...,A woman stands holding her baby and crying.,If a woman has a smile on her face then she is...,A woman standing with *smile* *on* *her* *face...,A woman stands holding her baby and *crying.*,4567,7,A baby is not wood.,...,13,5,the woman is either crying or has a smile,A *woman* standing with *smile* on her face an...,A *woman* stands holding her baby and *crying.*,41,17,Statement: a white dog runs across the water.\...,"The woman is smiling and holding a wood, not a...",contradiction
2,661749711.jpg#2r1e,entailment,A dog jumps to catch a red ball outside.,the dog is jumping.,Jumping is an action needed to catch a ball.,A *dog* *jumps* to *catch* a red *ball* *outs...,the *dog* is *jumping.*,27814,31,Dog jumps is past tense of dog jumping.,...,12,13,"If the dog jumps to catch, the dog is jumping.",A dog *jumps* to *catch* a red ball outside.,the dog is *jumping.*,24,3,Statement: A young child rides a bike with a b...,The statement is repeating the same information.,tautology
3,44904567.jpg#1r1c,contradiction,A man wielding an electric razor is gleefully ...,There is only one person present.,There cannot be one person present if one man ...,A *man* wielding an electric razor is gleefull...,There is only *one* *person* *present.*,11011,345,ANOTHER REPRESENTS ONE MORE AND CONTRAADICTORY...,...,10,32,It cannot be claimed that there is only one pe...,A *man* wielding an electric razor is gleefull...,There is *only* *one* *person* present.,11011,234,Statement: A young boy wearing safety glasses ...,There are two people present in the statement.,contradiction
4,32542645.jpg#0r1n,neutral,A group of people stares at a wall that is fil...,A group of people analyze art together,Not necessary that they analyze the art.,A group of people stares at a wall that is fil...,A group of people *analyze* art together,{},4,"Staring at drawings does not mean analyze art,...",...,{},45,The group may have just been admiring as oppos...,A group of people stares at a wall that is fil...,A group of people *analyze* art *together*,{},46,Statement: Young mother and child heading some...,Staring at a wall filled with drawings is a fo...,entailment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2429284131.jpg#2r1c,contradiction,Three men are standing on stage performing.,Women are playing trumpets on the street.,"either there are men on stage, or women playin...",Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,Women are not men.,...,1,0,There can be either men or women.,Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,Statement: Several basketball players are grab...,"The men are standing on stage performing, whil...",contradiction
996,4460943467.jpg#0r1e,entailment,3 young man in hoods standing in the middle of...,Three hood wearing people stand in a street.,3 Man in hoods are hood wearing people.,*3* young *man* in *hoods* standing in the mi...,Three *hood* *wearing* *people* stand in a st...,204,123,3 young man in hoods means three hood wearing ...,...,102412,0147,If they are standing in the middle of a quiet ...,3 young man in hoods *standing* *in* *the* *m...,Three hood wearing people *stand* *in* *a* *s...,56789111012,4567,Statement: A curly-haired brunette wearing a b...,The statements are describing the same thing.,equivalence
997,2226343167.jpg#0r1n,neutral,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a movie in the ...,walking down a street does not mean they are g...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,The man and woman may be strangers to each oth...,...,{},13811,Just because a man and woman are wearing theme...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,Statement: A brown dog standing in water.\nSta...,The statement does not give any indication tha...,neutral
998,3389707399.jpg#4r1e,entailment,Two men shining peoples shoes.,Two men shining shoes.,Two men are shining shoes if two men are shini...,Two *men* *shining* peoples shoes.,Two *men* *shining* shoes.,12,12,"Shoes belong to people; therefore, ""Two men sh...",...,142,123,"""Two men shining shoes"" is a rephrasing of ""tw...",Two men *shining* *peoples* *shoes.*,Two men *shining* *shoes.*,234,23,"Statement: A long-haired person, with a tan co...",Shining shoes implies that the shoes belong to...,entailment


In [17]:
np.mean(test_output.gold_label == test_output.pred_label)

0.675

In [19]:
print(pd.crosstab(test_output.gold_label, test_output.pred_label))

pred_label     assumption  contradiction  entailment  equivalence  equivalent  \
gold_label                                                                      
contradiction           0            299           5            0           0   
entailment              0              2         332            3           1   
neutral                 1             73         146            1           1   

pred_label     implication  inference  neutral  partial entailment  \
gold_label                                                           
contradiction            0          0       27                   0   
entailment               0          0       16                   1   
neutral                  2          4       44                   0   

pred_label     redundancy  reinforcement  repetition  speculation  synonym  \
gold_label                                                                   
contradiction           0              0           0            1        0   
entailmen