In [10]:
import openai
import os
import pandas as pd
import numpy as np
from transformers import GPT2TokenizerFast
openai.api_key = os.getenv("OPENAI_API_KEY")

In [11]:
df = pd.read_csv("../../results/1_falseconsensus/fineTuneDataSet.csv")
df["completion_numeric"] = np.around(df["propYes"],1) * 100
df["completion_numeric"] = df["completion_numeric"].astype(int)
df["completion_numeric"] = df["completion_numeric"].astype(str)
df["completion"] = df["completion_numeric"].replace(['0', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100',], 
                                                    ['none','ten','twenty','thirty','forty','fifty','sixty','seventy','eighty','ninety','everyone'])
df["index"] = np.arange(len(df))
df["item"] = df["item"].replace("Vehicle TheftVe", "Vehicle Theft")
df.head()

Unnamed: 0,title,version,nYes,nNo,nCantDecide,propYes,propNo,propCantDecide,item,header,continuation,completion_numeric,completion,index
0,Emergency Damages I,controversial,24,7,4,0.685714,0.2,0.114286,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",70,seventy,0
1,Emergency Damages I,unambiguous_covered,22,0,0,1.0,0.0,0.0,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",100,everyone,1
2,Emergency Damages I,unambiguous_uncovered,8,9,2,0.421053,0.473684,0.105263,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",40,forty,2
3,Emergency Damages II,controversial,17,14,0,0.548387,0.451613,0.0,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",50,fifty,3
4,Emergency Damages II,unambiguous_covered,27,0,0,1.0,0.0,0.0,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",100,everyone,4


In [12]:
# FOR LOGIT BIAS, WHICH WILL RESTRICT OUTPUT TO INTEGERS ON THE RANGE OF [1,100]
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# labels = [str(i + 1) for i in range(0,100)]
labels = ['none','ten','twenty','thirty','forty','fifty','sixty','seventy','eighty','ninety','everyone']
labels_tokens = {label: tokenizer.encode(" " + label) for label in labels}
print(labels_tokens)

{'none': [4844], 'ten': [3478], 'twenty': [8208], 'thirty': [12277], 'forty': [16571], 'fifty': [15334], 'sixty': [24742], 'seventy': [31989], 'eighty': [37516], 'ninety': [37989], 'everyone': [2506]}


In [13]:
tokens = [value[0] for key, value in labels_tokens.items()]
logit_biases = {}
for i in tokens:
    logit_biases[i] = 100
print(logit_biases)

{4844: 100, 3478: 100, 8208: 100, 12277: 100, 16571: 100, 15334: 100, 24742: 100, 31989: 100, 37516: 100, 37989: 100, 2506: 100}


In [14]:
# FUNCTION THAT TAKES AS ITS INPUT DATAFRAME AND # OF DESIRED EXAMPLES FOR FEW-SHOT PROMPTING
# OUTPUTS AN EVALUATION DATAFRAME THAT INCLUDES PROMPTS W/ RANDOMLY-SAMPLED DATA FROM DATAFRAME
# (EVALUATION DATAFRAME EXCLUDES THOSE EXAMPLES)
def create_evaluation_dataset(df, n_exampleGroups):
    prompt_header = ""
    exampleGroups = np.random.choice(np.unique(df["title"]),n_exampleGroups)
    examples = df[df["title"].isin(exampleGroups)].index
    for index in examples:
        prompt_header += "PROMPT: " + df["header"][index] + " " + df["continuation"][index] + "\n"
        # prompt_header += "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy?" + "\n"
        prompt_header += "COMPLETION: Out of 100 randomly-sampled English speakers, it is estimated that " + df["completion"][index] + " would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy.\n\n---\n\n"
    eval_set = df[~(np.isin(df["index"],examples))].copy(deep=False)
    eval_set["prompt_noexample"] = "QUESTION: " + eval_set["header"] + " " + eval_set["continuation"] + "\n" + "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + eval_set["item"] + " as it appears in the policy?" + "\n" + "ANSWER: Out of 100 randomly-sampled people, "
    eval_set["prompt_withexamples"] = prompt_header + eval_set["prompt_noexample"]
    return eval_set 

In [15]:
testEvalSet = create_evaluation_dataset(df, 2)

In [16]:
testEvalSet["prompt_withexamples"][0]

'QUESTION: Jett has insurance that covers "Trace and Access," defined as <b>"necessary and reasonable costs that you incur in locating and fixing accidental damage to cables, pipes, underground drain pipes or tanks providing services to and from your home."</b> Jett discovers that his house\'s water pressure is very low, and he calls an inspector to survey what may be the issue. Floorboards have to be removed to access the piping. It is then discovered that the water pipes have cracked over time due to long-term wear, an issue which could have been resolved by the previous inspector. Jett files a claim with his insurance company for the damage. .\nOut of 100 randomly-sampled people, approximately how many would believe that the claim is covered under Trace and Access as it appears in the policy?\nANSWER: Out of 100 randomly-sampled people, sixty would believe that the claim is covered under Trace and Access as it appears in the policy.\n\n---\n\nQUESTION: Jett has insurance that covers

In [20]:
def compare_fewAndZeroShot(df, n_exampleGroups, n_runs, openai_model):
    colnames = np.copy(df.columns)
    colnames = np.insert(colnames, 0, "random_seed")
    output =  pd.DataFrame(columns = colnames)
    for i in range(0, n_runs):
        print("Starting run " + str(i + 1) + " of " + str(n_runs))
        np.random.seed(i+1)
        evalSet = create_evaluation_dataset(df, n_exampleGroups)
        evalSet["predictions_zeroshot"] = evalSet.apply(lambda x : openai.Completion.create(model=openai_model, 
                                    prompt=x["prompt_noexample"],   
                                    logit_bias=logit_biases,  
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
        evalSet["predictions_fewshot"] = evalSet.apply(lambda x : openai.Completion.create(model=openai_model, 
                                        prompt=x["prompt_withexamples"],   
                                        logit_bias=logit_biases, 
                                        temperature=0, max_tokens=1).choices[0].text, axis = 1)
        evalSet["random_seed"] = i
        output = pd.concat([output, evalSet])
    output['predictions_zeroshot_numeric'] = output["predictions_zeroshot"].replace([' none',' ten',' twenty',' thirty',' forty',' fifty',' sixty',' seventy',' eighty',' ninety',' everyone'],
                                                                                     ['0', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100'])
    output['predictions_fewshot_numeric'] = output["predictions_fewshot"].replace([' none',' ten',' twenty', ' thirty',' forty',' fifty',' sixty',' seventy',' eighty',' ninety',' everyone'],
                                                                                     ['0', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100'])
    return output

In [18]:
comparison_1ex_100runs_curie = compare_fewAndZeroShot(df, n_exampleGroups = 1, n_runs = 100, openai_model = "curie")

Starting run 1 of 100
Starting run 2 of 100
Starting run 3 of 100
Starting run 4 of 100
Starting run 5 of 100
Starting run 6 of 100
Starting run 7 of 100
Starting run 8 of 100
Starting run 9 of 100
Starting run 10 of 100
Starting run 11 of 100
Starting run 12 of 100
Starting run 13 of 100
Starting run 14 of 100
Starting run 15 of 100
Starting run 16 of 100
Starting run 17 of 100
Starting run 18 of 100
Starting run 19 of 100
Starting run 20 of 100
Starting run 21 of 100
Starting run 22 of 100
Starting run 23 of 100
Starting run 24 of 100
Starting run 25 of 100
Starting run 26 of 100
Starting run 27 of 100
Starting run 28 of 100
Starting run 29 of 100
Starting run 30 of 100
Starting run 31 of 100
Starting run 32 of 100
Starting run 33 of 100
Starting run 34 of 100
Starting run 35 of 100
Starting run 36 of 100
Starting run 37 of 100
Starting run 38 of 100
Starting run 39 of 100
Starting run 40 of 100
Starting run 41 of 100
Starting run 42 of 100
Starting run 43 of 100
Starting run 44 of 1

In [19]:
comparison_1ex_100runs_curie.to_csv("comparison_1ex_100runs_curie.csv")

In [21]:
comparison_2ex_100runs_curie = compare_fewAndZeroShot(df, n_exampleGroups = 2, n_runs = 100, openai_model = "curie")

Starting run 1 of 100
Starting run 2 of 100
Starting run 3 of 100
Starting run 4 of 100
Starting run 5 of 100
Starting run 6 of 100
Starting run 7 of 100
Starting run 8 of 100
Starting run 9 of 100
Starting run 10 of 100
Starting run 11 of 100
Starting run 12 of 100
Starting run 13 of 100
Starting run 14 of 100
Starting run 15 of 100
Starting run 16 of 100
Starting run 17 of 100
Starting run 18 of 100
Starting run 19 of 100
Starting run 20 of 100
Starting run 21 of 100
Starting run 22 of 100
Starting run 23 of 100
Starting run 24 of 100
Starting run 25 of 100
Starting run 26 of 100
Starting run 27 of 100
Starting run 28 of 100
Starting run 29 of 100
Starting run 30 of 100
Starting run 31 of 100
Starting run 32 of 100
Starting run 33 of 100
Starting run 34 of 100
Starting run 35 of 100
Starting run 36 of 100
Starting run 37 of 100
Starting run 38 of 100
Starting run 39 of 100
Starting run 40 of 100
Starting run 41 of 100
Starting run 42 of 100
Starting run 43 of 100
Starting run 44 of 1

In [22]:
comparison_2ex_100runs_curie.to_csv("comparison_2ex_100runs_curie.csv")

In [23]:
# FOR LOGIT BIAS, WHICH WILL RESTRICT OUTPUT TO INTEGERS ON THE RANGE OF [1,100]
labels_numeric = ['0','10','20','30','40','50','60','70','80','90','100']
labels_tokens_numeric = {label: tokenizer.encode(" " + label) for label in labels_numeric}
print(labels_tokens_numeric)

{'0': [657], '10': [838], '20': [1160], '30': [1542], '40': [2319], '50': [2026], '60': [3126], '70': [4317], '80': [4019], '90': [4101], '100': [1802]}


In [32]:
tokens_numeric = [value[0] for key, value in labels_tokens_numeric.items()]
logit_biases_numeric = {}
for i in tokens_numeric:
    logit_biases_numeric[i] = 100
print(logit_biases_numeric)

{657: 100, 838: 100, 1160: 100, 1542: 100, 2319: 100, 2026: 100, 3126: 100, 4317: 100, 4019: 100, 4101: 100, 1802: 100}


In [33]:
# FUNCTION THAT TAKES AS ITS INPUT DATAFRAME AND # OF DESIRED EXAMPLES FOR FEW-SHOT PROMPTING
# OUTPUTS AN EVALUATION DATAFRAME THAT INCLUDES PROMPTS W/ RANDOMLY-SAMPLED DATA FROM DATAFRAME
# (EVALUATION DATAFRAME EXCLUDES THOSE EXAMPLES)
def create_evaluation_dataset_numeric(df, n_exampleGroups):
    prompt_header = ""
    exampleGroups = np.random.choice(np.unique(df["title"]),n_exampleGroups)
    examples = df[df["title"].isin(exampleGroups)].index
    for index in examples:
        prompt_header += "QUESTION: " + df["header"][index] + " " + df["continuation"][index] + "\n"
        prompt_header += "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy?" + "\n"
        prompt_header += "ANSWER: Out of 100 randomly-sampled people, " + df["completion_numeric"][index] + " would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy.\n\n---\n\n"
    eval_set = df[~(np.isin(df["index"],examples))].copy(deep=False)
    eval_set["prompt_noexample"] = "QUESTION: " + eval_set["header"] + " " + eval_set["continuation"] + "\n" + "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + eval_set["item"] + " as it appears in the policy?" + "\n" + "ANSWER: Out of 100 randomly-sampled people, "
    eval_set["prompt_withexamples"] = prompt_header + eval_set["prompt_noexample"]
    return eval_set 

In [34]:
testEvalSet_numeric = create_evaluation_dataset_numeric(df, 2)

In [35]:
testEvalSet_numeric["prompt_withexamples"][0]

'QUESTION: Charlotte has insurance that covers damage from "Flooding" to her home, defined as <b>"an invasion of the property by a large volume of water caused by a sudden release from outside the buildings."</b> Charlotte lives alone in a one-story home. One day, one of the rain barrels that she keeps next to her home begins to leak, causing a large volume of water to suddenly seep into her home. This causes significant water damage to her basement. Charlotte files a claim with her insurance company for the damage.\nOut of 100 randomly-sampled people, approximately how many would believe that the claim is covered under Flooding as it appears in the policy?\nANSWER: Out of 100 randomly-sampled people, 50 would believe that the claim is covered under Flooding as it appears in the policy.\n\n---\n\nQUESTION: Charlotte has insurance that covers damage from "Flooding" to her home, defined as <b>"an invasion of the property by a large volume of water caused by a sudden release from outside 

In [39]:
def compare_fewAndZeroShot_numeric(df, n_exampleGroups, n_runs, openai_model):
    colnames = np.copy(df.columns)
    colnames = np.insert(colnames, 0, "random_seed")
    output =  pd.DataFrame(columns = colnames)
    for i in range(0, n_runs):
        print("Starting run " + str(i + 1) + " of " + str(n_runs))
        np.random.seed(i+1)
        evalSet = create_evaluation_dataset_numeric(df, n_exampleGroups)
        evalSet["predictions_zeroshot_numeric"] = evalSet.apply(lambda x : openai.Completion.create(model=openai_model, 
                                    prompt=x["prompt_noexample"],   
                                    logit_bias=logit_biases_numeric,  
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
        evalSet["predictions_fewshot_numeric"] = evalSet.apply(lambda x : openai.Completion.create(model=openai_model, 
                                        prompt=x["prompt_withexamples"],   
                                        logit_bias=logit_biases_numeric, 
                                        temperature=0, max_tokens=1).choices[0].text, axis = 1)
        evalSet["random_seed"] = i
        output = pd.concat([output, evalSet])
    return output

In [42]:
comparison_2ex_100runs_curie_numeric = compare_fewAndZeroShot_numeric(df, n_exampleGroups = 2, n_runs = 100, openai_model = "curie")

Starting run 1 of 100
Starting run 2 of 100
Starting run 3 of 100
Starting run 4 of 100
Starting run 5 of 100
Starting run 6 of 100
Starting run 7 of 100
Starting run 8 of 100
Starting run 9 of 100
Starting run 10 of 100
Starting run 11 of 100
Starting run 12 of 100
Starting run 13 of 100
Starting run 14 of 100
Starting run 15 of 100
Starting run 16 of 100
Starting run 17 of 100
Starting run 18 of 100
Starting run 19 of 100
Starting run 20 of 100
Starting run 21 of 100
Starting run 22 of 100
Starting run 23 of 100
Starting run 24 of 100
Starting run 25 of 100
Starting run 26 of 100
Starting run 27 of 100
Starting run 28 of 100
Starting run 29 of 100
Starting run 30 of 100
Starting run 31 of 100
Starting run 32 of 100
Starting run 33 of 100
Starting run 34 of 100
Starting run 35 of 100
Starting run 36 of 100
Starting run 37 of 100
Starting run 38 of 100
Starting run 39 of 100
Starting run 40 of 100
Starting run 41 of 100
Starting run 42 of 100
Starting run 43 of 100
Starting run 44 of 1

In [41]:
comparison_2ex_2runs_curie_numeric

Unnamed: 0,random_seed,title,version,nYes,nNo,nCantDecide,propYes,propNo,propCantDecide,item,header,continuation,completion_numeric,completion,index,prompt_noexample,prompt_withexamples,predictions_zeroshot_numeric,predictions_fewshot_numeric
0,0,Emergency Damages I,controversial,24,7,4,0.685714,0.200000,0.114286,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",70,seventy,0,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Gene's car insurance policy includes...,50,40
1,0,Emergency Damages I,unambiguous_covered,22,0,0,1.000000,0.000000,0.000000,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",100,everyone,1,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Gene's car insurance policy includes...,40,40
2,0,Emergency Damages I,unambiguous_uncovered,8,9,2,0.421053,0.473684,0.105263,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",40,forty,2,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Gene's car insurance policy includes...,40,40
3,0,Emergency Damages II,controversial,17,14,0,0.548387,0.451613,0.000000,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",50,fifty,3,"QUESTION: Salma's home insurance covers ""Emerg...",QUESTION: Gene's car insurance policy includes...,40,40
4,0,Emergency Damages II,unambiguous_covered,27,0,0,1.000000,0.000000,0.000000,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",100,everyone,4,"QUESTION: Salma's home insurance covers ""Emerg...",QUESTION: Gene's car insurance policy includes...,40,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,1,Vehicle Theft VI,unambiguous_covered,24,0,0,1.000000,0.000000,0.000000,Vehicle Theft,Cooper's car insurance policy includes coverag...,"Cooper, who keeps a GPS tracking device in his...",100,everyone,133,QUESTION: Cooper's car insurance policy includ...,QUESTION: Clint has home insurance that covers...,40,40
134,1,Vehicle Theft VI,unambiguous_uncovered,0,22,0,0.000000,1.000000,0.000000,Vehicle Theft,Cooper's car insurance policy includes coverag...,"Cooper, who doesn't keep a GPS tracking device...",0,none,134,QUESTION: Cooper's car insurance policy includ...,QUESTION: Clint has home insurance that covers...,40,40
135,1,Wind Damage,controversial,16,8,2,0.615385,0.307692,0.076923,Wind Damage,Tom's home insurance policy includes coverage ...,Tom's house is located near a large lake. One ...,60,sixty,135,QUESTION: Tom's home insurance policy includes...,QUESTION: Clint has home insurance that covers...,40,40
136,1,Wind Damage,unambiguous_covered,26,0,0,1.000000,0.000000,0.000000,Wind Damage,Tom's home insurance policy includes coverage ...,Tom's house is located near a large lake. One ...,100,everyone,136,QUESTION: Tom's home insurance policy includes...,QUESTION: Clint has home insurance that covers...,40,40


In [43]:
comparison_2ex_100runs_curie_numeric.to_csv("comparison_2ex_100runs_curie_numeric.csv")