# Setup

## Load Packages

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openai
import re
import time
from scripts.prepare_data_helpers import prepare_examples, create_query
from dotenv import load_dotenv

load_dotenv()

True

## Prepare Data

In [2]:
train1 = pd.read_csv('../../e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../../e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../../e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../../e-SNLI/dataset/esnli_test.csv')

train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(test.index.values, size=1000, replace=False))
test = test.loc[test_indices]
test = test.reset_index(drop=True)

In [3]:
np.random.seed(12345) # seed for numpy package
random.seed(12345) # seed for random package
prompts = [prepare_examples(train, size_per_class=1, include_label=True) + '\n\n' + create_query(test.loc[i], include_label=True) for i in test.index]
test['prompt'] = prompts

# Generate Predictions on Test Set

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [9]:
pred_expl = list()
pred_label = list()
for i in range(int(np.ceil(test.shape[0] / 20))):
    if i != 0:
        time.sleep(60) # API allows only 20 calls or one call with 20 prompts per minute
    indices = np.arange(i * 20, (i + 1) * 20)
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=list(test.iloc[indices].prompt),
        temperature=0.5,
        max_tokens=58,
        top_p=1,
    )
    pred_expl += [re.findall('Explanation:.+', response.choices[i].text)[0].replace('Explanation: ', '').strip() for i in range(len(response.choices))]
    pred_label += [re.findall('.+\nExplanation:', response.choices[i].text)[0].replace('\nExplanation:', '').replace('Label: ', '').strip() for i in range(len(response.choices))]

In [11]:
test_output = test
test_output['pred_explanation'] = pred_expl
test_output['pred_label'] = pred_label
test_output.to_csv('../output_data/test_predictions.csv')

In [21]:
test_output.to_csv('../output_data/test_predictions.csv', sep=';')

In [17]:
np.mean(test_output.gold_label == test_output.pred_label)

0.675

In [19]:
print(pd.crosstab(test_output.gold_label, test_output.pred_label))

pred_label     assumption  contradiction  entailment  equivalence  equivalent  \
gold_label                                                                      
contradiction           0            299           5            0           0   
entailment              0              2         332            3           1   
neutral                 1             73         146            1           1   

pred_label     implication  inference  neutral  partial entailment  \
gold_label                                                           
contradiction            0          0       27                   0   
entailment               0          0       16                   1   
neutral                  2          4       44                   0   

pred_label     redundancy  reinforcement  repetition  speculation  synonym  \
gold_label                                                                   
contradiction           0              0           0            1        0   
entailmen