# False Presupposition Queries
- "Why isn't {subset} a {classlabel}?"
- Asking a leading question, to see if GPT3 is smart enough to refute it, or if it will riff off of a prompt.

-----
### Imports

In [3]:
import pandas as pd
import openai
from tqdm import tqdm
tqdm.pandas()
from datetime import datetime
import os
import time

from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings("ignore")

openai.api_key = os.getenv('openai.api_key')

### Define functions

In [8]:
def get_correct_article(text):
    ## simple logic to make articles match their targets
    if text[0].lower() in ['a', 'e', 'i', 'o', 'u']:
        article_variant = "an"
    else:
        article_variant = "a"
    
    return article_variant

def false_presupposition_question(subset, superset):
    article_variant = get_correct_article(superset)

    boilerplate = f"Why isn't {subset} {article_variant} {superset}?"
    
    return boilerplate

def false_presupposition_query(subset, superset):
    time.sleep(1)
    article_variant = get_correct_article(superset)

    boilerplate = f"Why isn't {subset} {article_variant} {superset}?"    
    response_basic = openai.Completion.create(model="text-davinci-003", prompt=boilerplate, temperature=0, max_tokens=1000)
    
    return response_basic['choices'][0]['text']

### Load dataset

In [6]:
df = pd.read_csv(r"../Data/set_subset_responses_complete.csv")

In [10]:
bigdf = pd.DataFrame()
for label in df['classLabel.value'].unique():
    tempdf = df[df['classLabel.value']==label].sample(frac=.1, random_state=42)
    
    bigdf = pd.concat([bigdf, tempdf])

In [39]:
bigdf = bigdf.sort_values(by='classLabel.value').reset_index(drop=True)

### Manually inspect that our 10% sampling worked

In [12]:
df['classLabel.value'].value_counts()

human                34838
album                 2514
village               2072
film                  1872
river                 1195
business              1104
musical group          913
literary work          824
mountain               482
television series      478
Name: classLabel.value, dtype: int64

In [13]:
bigdf['classLabel.value'].value_counts()

human                3484
album                 251
village               207
film                  187
river                 120
business              110
musical group          91
literary work          82
mountain               48
television series      48
Name: classLabel.value, dtype: int64

### Split dataset into chunks of 50 rows
This is a technique we used to hedge against a connection error or traceback in the long-running scraping process.
Without paying for enhanced speed, the OpenAI API restricts you to 1 call per second. This means that running tens of 
thousands of requests takes quite some time, and you defintely don't want to have to restart a process like that.  

When our dataframe is split into smaller dataframes of 50 records, you can never lose more than ~1-3 minutes of work, 
depending on network latency. It worked well for the original queries, so we're reusing this design pattern. 

In [42]:
n = 50
## Not particularly readable, but slice the DF by row in chunks of 50 for as long as the larger DF is
list_df = [bigdf[i:i+n] for i in range(0,len(bigdf),n)]

### Run the false presupposition queries
- Iterating over smaller dataframes to help hedge against dropping a large number of (expensive) queries on the floor
- Diagnostic printouts (TQDM, time) to give you a sense of how long it's been running, and how long you have left

In [None]:
starttime = datetime.now()
print(f"Started at {starttime.strftime('%m/%d/%Y, %H:%M:%S')}")

for sub_df in tqdm(list_df):
    sub_df['false presupposition response'] = sub_df.apply(lambda x: false_presupposition_query(x['itemLabel.value'], x['classLabel.value']), axis=1)
    
stoptime = datetime.now()
print(f"Finished at {stoptime.strftime('%m/%d/%Y, %H:%M:%S')}")
print(f"Ran in {str(stoptime - starttime)}")

### Join the sub-dfs back into the larger df, and clean output

In [48]:
## Join the small DFs
finished_df = pd.concat(list_df)

In [52]:
## Strip the double newline characters GPT3.5 injects into responses
finished_df['false presupposition response'] = finished_df['false presupposition response'].str.strip("\n")

In [62]:
## Make a column to represent the query in human-readable format, as we didn't save the original
finished_df['false presupposition question'] = finished_df.apply(lambda x: false_presupposition_question(x['itemLabel.value'], x['classLabel.value']), axis=1)

In [14]:
## Reorder the columns to make a little more sense
finished_df = finished_df[['item.value', 'class.value', 'itemLabel.value', 'classLabel.xml:lang',
       'classLabel.value', 'set subset question', 'set subset response',
       'True or False', 'confidence', 'false presupposition question', 'false presupposition response']]

## Save output

In [16]:
finished_df.to_csv("../Data/False Presupposition (Ungraded, Questions baked in).csv", index=False)