In [1]:
import openai
import os
import pandas as pd
import numpy as np
from transformers import GPT2TokenizerFast
openai.api_key = os.getenv("OPENAI_API_KEY")

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
df = pd.read_csv("../../results/1_falseconsensus/fineTuneDataSet.csv")
df["completion"] = np.around(df["propYes"],1) * 100
df["completion"] = df["completion"].astype(int)
df["completion"] = df["completion"].astype(str)
df["index"] = np.arange(len(df))
df["item"] = df["item"].replace("Vehicle TheftVe", "Vehicle Theft")
np.unique(df["completion"])

array(['0', '10', '100', '20', '30', '40', '50', '60', '70', '80', '90'],
      dtype=object)

In [3]:
# FOR LOGIT BIAS, WHICH WILL RESTRICT OUTPUT TO INTEGERS ON THE RANGE OF [1,100]
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# labels = [str(i + 1) for i in range(0,100)]
labels = [str((i + 1)*10) for i in range(-1,10)]
labels.append("would")
labels_tokens = {label: tokenizer.encode(" " + label) for label in labels}
print(labels_tokens)

{'0': [657], '10': [838], '20': [1160], '30': [1542], '40': [2319], '50': [2026], '60': [3126], '70': [4317], '80': [4019], '90': [4101], '100': [1802], 'would': [561]}


In [4]:
tokens = [value[0] for key, value in labels_tokens.items()]
logit_biases = {}
for i in tokens:
    logit_biases[i] = 100
print(logit_biases)

{657: 100, 838: 100, 1160: 100, 1542: 100, 2319: 100, 2026: 100, 3126: 100, 4317: 100, 4019: 100, 4101: 100, 1802: 100, 561: 100}


In [5]:
# FUNCTION THAT TAKES AS ITS INPUT DATAFRAME AND # OF DESIRED EXAMPLES FOR FEW-SHOT PROMPTING
# OUTPUTS AN EVALUATION DATAFRAME THAT INCLUDES PROMPTS W/ RANDOMLY-SAMPLED DATA FROM DATAFRAME
# (EVALUATION DATAFRAME EXCLUDES THOSE EXAMPLES)
def create_evaluation_dataset(df, n_exampleGroups):
    prompt_header = ""
    exampleGroups = np.random.choice(np.unique(df["title"]),n_exampleGroups)
    examples = df[df["title"].isin(exampleGroups)].index
    for index in examples:
        prompt_header += "QUESTION: " + df["header"][index] + " " + df["continuation"][index] + "\n"
        prompt_header += "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy?" + "\n"
        prompt_header += "ANSWER: Out of 100 randomly-sampled people, approximately " + df["completion"][index] + " would believe that the claim is covered under " + df["item"][index] + " as it appears in the policy.\n\n---\n\n"
    eval_set = df[~(np.isin(df["index"],examples))].copy(deep=False)
    eval_set["prompt_noexample"] = "QUESTION: " + eval_set["header"] + " " + eval_set["continuation"] + "\n" + "Out of 100 randomly-sampled people, approximately how many would believe that the claim is covered under " + eval_set["item"] + " as it appears in the policy?" + "\n" + "ANSWER: Out of 100 randomly-sampled people, approximately "
    eval_set["prompt_withexamples"] = prompt_header + eval_set["prompt_noexample"]
    return eval_set 

In [11]:
evalSet = create_evaluation_dataset(df, 2)

In [12]:
evalSet["prompt_withexamples"][1]

'QUESTION: Genie has home insurance that covers "Garden Plants" damage, defined as <b>"damage to plants, bushes, shrubs and trees within the boundaries of the land belonging to the home caused by civil commotion, strikes, or labor and political disturbances."</b> Genie buys special fertilizer for her garden plants from a hardware store located next door. When the workers of the store go on strike, the store temporarily shuts down due to staff shortages, and Genie is unable to find the fertilizer within driving distance from her home or online. Without the fertilizer, the plants die. Genie files a claim with her insurance company for the damage to her plants.\nOut of 100 randomly-sampled people, approximately how many would believe that the claim is covered under Garden Plants as it appears in the policy?\nANSWER: Out of 100 randomly-sampled people, approximately 40 would believe that the claim is covered under Garden Plants as it appears in the policy.\n\n---\n\nQUESTION: Genie has hom

In [13]:
evalSet["predictions_zeroshot"] = evalSet.apply(lambda x : openai.Completion.create(model="curie", 
                                    prompt=x["prompt_noexample"],   
                                    logit_bias=logit_biases,  
                                    stop = "would",
                                    temperature=0, max_tokens=2).choices[0].text, axis = 1)

In [14]:
evalSet["predictions_fewshot"] = evalSet.apply(lambda x : openai.Completion.create(model="curie", 
                                    prompt=x["prompt_withexamples"],   
                                    logit_bias=logit_biases, 
                                    stop = ["would"],
                                    temperature=0, max_tokens=2).choices[0].text, axis = 1)

In [15]:
goldLabels = np.array(evalSet["completion"]).astype(int)
zeroShotPredictions = np.array(evalSet["predictions_zeroshot"]).astype(int)
fewShotPredictions = np.array(evalSet["predictions_fewshot"]).astype(int)

In [16]:
evalSet

Unnamed: 0,title,version,nYes,nNo,nCantDecide,propYes,propNo,propCantDecide,item,header,continuation,completion,index,prompt_noexample,prompt_withexamples,predictions_zeroshot,predictions_fewshot
0,Emergency Damages I,controversial,24,7,4,0.685714,0.200000,0.114286,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",70,0,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Genie has home insurance that covers...,40,40
1,Emergency Damages I,unambiguous_covered,22,0,0,1.000000,0.000000,0.000000,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",100,1,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Genie has home insurance that covers...,40,40
2,Emergency Damages I,unambiguous_uncovered,8,9,2,0.421053,0.473684,0.105263,Emergency Damages,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears loud crashing noi...",40,2,"QUESTION: Joanne's home insurance covers ""Emer...",QUESTION: Genie has home insurance that covers...,40,40
3,Emergency Damages II,controversial,17,14,0,0.548387,0.451613,0.000000,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",50,3,"QUESTION: Salma's home insurance covers ""Emerg...",QUESTION: Genie has home insurance that covers...,40,40
4,Emergency Damages II,unambiguous_covered,27,0,0,1.000000,0.000000,0.000000,Emergency Damages,"Salma's home insurance covers ""Emergency Damag...","Late one night, Salma hears noises coming from...",100,4,"QUESTION: Salma's home insurance covers ""Emerg...",QUESTION: Genie has home insurance that covers...,40,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,Vehicle Theft VI,unambiguous_covered,24,0,0,1.000000,0.000000,0.000000,Vehicle Theft,Cooper's car insurance policy includes coverag...,"Cooper, who keeps a GPS tracking device in his...",100,133,QUESTION: Cooper's car insurance policy includ...,QUESTION: Genie has home insurance that covers...,0,40
134,Vehicle Theft VI,unambiguous_uncovered,0,22,0,0.000000,1.000000,0.000000,Vehicle Theft,Cooper's car insurance policy includes coverag...,"Cooper, who doesn't keep a GPS tracking device...",0,134,QUESTION: Cooper's car insurance policy includ...,QUESTION: Genie has home insurance that covers...,0,40
135,Wind Damage,controversial,16,8,2,0.615385,0.307692,0.076923,Wind Damage,Tom's home insurance policy includes coverage ...,Tom's house is located near a large lake. One ...,60,135,QUESTION: Tom's home insurance policy includes...,QUESTION: Genie has home insurance that covers...,40,40
136,Wind Damage,unambiguous_covered,26,0,0,1.000000,0.000000,0.000000,Wind Damage,Tom's home insurance policy includes coverage ...,Tom's house is located near a large lake. One ...,100,136,QUESTION: Tom's home insurance policy includes...,QUESTION: Genie has home insurance that covers...,40,40


In [19]:
zeroShotPredictions

array([40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  0,  0, 40, 40, 40, 40, 40,
       40, 40, 40, 40, 40, 40, 40, 40,  0,  0, 40, 40, 40, 40, 40, 40, 40,
       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  0, 40,
       40, 40, 40, 40, 40, 40, 40,  0, 40, 40, 40, 40, 40, 40, 40, 40, 40,
       40, 40, 40, 40,  0, 40, 40, 40, 40, 40, 40, 40, 40,  0, 40,  0, 40,
       40,  0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50, 50, 50,
       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
       40, 40, 40, 40,  0,  0,  0, 40, 40, 40])

In [18]:
np.square(np.subtract(goldLabels, fewShotPredictions)).mean()

1289.922480620155

In [20]:
np.square(np.subtract(goldLabels, zeroShotPredictions)).mean()

1580.6201550387598

In [21]:
np.random.randint(low = 0, high = len(df) - 1, size = 3)

array([113,  57,  72])

In [None]:
# Create a discrete set of categories from the 'propYes' column (because OpenAI API does not yet support regression).
# The OpenAI API also does not yet support ordinal classification, so we will restrict our focus to 3 categories. 
# The category column must be called 'completion' for the purposes of fine-tuning. 
# Note: whitespace added before label
df["completion"] = df.apply(lambda x : " yes" if x["propYes"] > 0.75 else (" no" if x["propYes"] < 0.25 else " either"),
                           axis = 1)
df["completion"].value_counts()

In [None]:
# Create our prompt. 
df["question"] = """
Assign the text to one of three categories:
yes: More than 75% of people would agree that the claim described in the TEXT is covered under """ + df.item + """ as it appears in the policy.
either: Between 25% and 75% would agree.
no: Fewer than 25% would agree.
"""
df["prompt"] = df["question"] + "TEXT: " + df["header"] + " " + df["continuation"] + "\n" + "CATEGORY:"
# df["prompt"][1]

In [None]:
# CREATE 10 TRAIN/TEST FOLDS 
df["index"] = np.arange(len(df))
foldIndices = np.copy(df.index)
np.random.shuffle(foldIndices)
folds = np.array_split(foldIndices, 10)

In [None]:
# PREPARE FINE-TUNING/EVALUATION FOR THE FIRST FOLD ON A SMALL MODEL (ADA)
# ... first by creating train/test splits 
trainSet = df[~(np.isin(df["index"],folds[0]))]
trainSet[["prompt","completion"]].to_json("data/train1.jsonl", orient='records', lines=True)
testSet = df[np.isin(df["index"],folds[0])]
testSet.to_json("data/test1.jsonl", orient='records', lines=True)

In [None]:
testSet.tail()

In [None]:
# OPTIONAL: Make sure we're using the most recent OpenAI cli. (0.26.4)
# !pip install --upgrade openai

In [None]:
# OPTIONAL: analyze training data with OpenAI's built-in CLI tool. Should pass. OpenAI asks if we want to split into a training and validation set, which we don't (we'll do that ourselves).
# !openai tools fine_tunes.prepare_data -f data/train1.jsonl

In [None]:
!openai api fine_tunes.create -t "data/train1.jsonl" -m curie --learning_rate_multiplier 0.02

In [None]:
!openai api fine_tunes.follow -i ft-YMF91KpPbOLRCxk6p1qPYUM2

In [None]:
# EVALUATE ON THE TEST SET FOR THE FOLD:
testSet_curie = pandas.read_json("data/test1.jsonl", orient='records', lines=True)[["title","version","prompt","completion"]]
testSet_curie.head()

In [None]:
# {'yes': [3763], 'no': [645], 'either': [2035]}
testSet_curie["ft_prediction"] = testSet_curie.apply(lambda x : openai.Completion.create(model="curie:ft-stanford-2023-01-27-07-38-51", 
                                    prompt=x["prompt"],   
                                    logit_bias={3763:100,645:100,2035:100},  
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)

In [None]:
# {'yes': [3763], 'no': [645], 'either': [2035]}
testSet_curie["untuned_prediction"] = testSet_curie.apply(lambda x : openai.Completion.create(model="curie", 
                                    prompt=x["prompt"],       
                                    logit_bias={3763:100,645:100,2035:100},                                                     
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)

In [None]:
testSet_curie["fold"] = 1
testSet_curie["model"] = "curie:ft-stanford-2023-01-27-07-38-51"
testSet_curie

In [None]:
fineTuneRecords_curie = pandas.DataFrame(data = {"fold" : [1], "file": ["file-rn56S6fKQas8CUCG7DcRrk8a"], "ft": ["ft-eZgp39Ha4EW0z1cT8EaUQy8m"]})

In [None]:
# CREATE TRAIN/TEST SPLITS FOR THE REMAINING 9 FOLDS
for i in range(1,10):
    trainSet = df[~(np.isin(df["index"],folds[i]))]
    trainSet[["prompt","completion"]].to_json("data/train" + str(i+1) + ".jsonl", orient='records', lines=True)
    testSet = df[np.isin(df["index"],folds[i])]
    testSet.to_json("data/test" + str(i+1) + ".jsonl", orient='records', lines=True)

In [None]:
# CREATE FINE-TUNE REQUESTS FOR REMAINING 9 FOLDS (USING THE PYTHON API RATHER THAN CLI)
for i in range(1,10):
    fileCreateCallBack = openai.File.create(
        file=(pandas.read_json("data/train" + str(i+1) + ".jsonl", orient = 'records', lines = True)).to_json(orient = "records", lines=True),
        purpose='fine-tune'
    )
    fineTuneCreateCallBack = openai.FineTune.create(
        training_file=fileCreateCallBack.id, 
        model = "curie",
        learning_rate_multiplier = 0.02
    )
    fineTuneRecords_curie = fineTuneRecords_curie.append({"fold": i + 1, "file": fileCreateCallBack.id, "ft": fineTuneCreateCallBack.id}, ignore_index = True)

In [None]:
fineTuneRecords_curie

In [None]:
!openai api fine_tunes.follow -i ft-RwIzE0zm26DeFyfOAhAXE66n

In [None]:
# CREATE A DATAFRAME WITH COLUMNS: TITLE, VERSION, PROMPT, COMPLETION, FT_PREDICITON, UNTUNED_PREDICTION, FOLD, MODEL
results_curie = pandas.DataFrame(data = {"title":[], "version":[], "prompt":[], 
                           "completion":[], "ft_prediction":[], "untuned_prediction":[], "fold": [], "model": []})

In [None]:
results_curie = pandas.concat([results_curie,testSet_curie])
results_curie

In [None]:
# Make an ordered array of fine-tuned model names 
fineTuneRecords_curie["model"] = "None"
for i in range(0,10): #range(1,10)
    fineTuneRecords_curie["model"][i] = (openai.FineTune.retrieve(fineTuneRecords_curie["ft"][i]).fine_tuned_model)
fineTuneRecords_curie.to_csv("fineTuneRecords_curie.csv")
fineTuneRecords_curie

In [None]:
# GET FINE-TUNE AND UNTUNED PREDICTIONS FOR REMAINING 9 FOLDS (USING THE PYTHON API RATHER THAN CLI)
for i in range(1,10):
    testSet = pandas.read_json("data/test" + str(i+1) + ".jsonl", orient='records', lines=True)[["title","version","prompt","completion"]]
    testSet["ft_prediction"] = testSet.apply(lambda x : openai.Completion.create(model=fineTuneRecords_curie["model"][i], 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,8627:100},
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
    testSet["untuned_prediction"] = testSet.apply(lambda x : openai.Completion.create(model="ada", 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,8627:100},
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
    testSet["fold"] = i + 1
    testSet["model"] = fineTuneRecords_curie["model"][i]
    results_curie = pandas.concat([results_curie,testSet])

In [None]:
results_curie

In [None]:
results_curie.to_csv("results_curie.csv")

In [None]:
# DAVINCI

In [None]:
!openai api fine_tunes.create -t "data/train1.jsonl" -m davinci --learning_rate_multiplier 0.05

In [None]:
!openai api fine_tunes.follow -i ft-00eLrl2gDrWYJs47EedcGRIZ

In [None]:
# EVALUATE ON THE TEST SET FOR THE FOLD:
testSet_davinci = pandas.read_json("data/test1.jsonl", orient='records', lines=True)[["title","version","prompt","completion"]]
testSet_davinci.head()

In [None]:
testSet_davinci["ft_prediction"] = testSet_davinci.apply(lambda x : openai.Completion.create(model="davinci:ft-stanford-2023-01-27-09-41-01", 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,2035:100},  
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)

In [None]:
testSet_davinci["untuned_prediction"] = testSet_davinci.apply(lambda x : openai.Completion.create(model="davinci", 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,2035:100},  
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)

In [None]:
testSet_davinci["fold"] = 1
testSet_davinci["model"] = "davinci:ft-stanford-2023-01-27-09-41-01"
testSet_davinci

In [None]:
# CREATE A DATAFRAME WITH COLUMNS: TITLE, VERSION, PROMPT, COMPLETION, FT_PREDICITON, UNTUNED_PREDICTION, FOLD, MODEL
results_davinci = pandas.DataFrame(data = {"title":[], "version":[], "prompt":[], 
                           "completion":[], "ft_prediction":[], "untuned_prediction":[], "fold": [], "model": []})

In [None]:
fineTuneRecords_davinci = pandas.DataFrame(data = {"fold" : [1], "file": ["file-1NUX8m6tRVoB3sHIFY8aUPm3"], "ft": ["ft-00eLrl2gDrWYJs47EedcGRIZ"]})

In [None]:
results_davinci = pandas.concat([testSet_davinci, results_davinci])

In [None]:
# CREATE FINE-TUNE REQUESTS FOR REMAINING 9 FOLDS (USING THE PYTHON API RATHER THAN CLI)
for i in range(1,2):
    fileCreateCallBack = openai.File.create(
        file=(pandas.read_json("data/train" + str(i+1) + ".jsonl", orient = 'records', lines = True)).to_json(orient = "records", lines=True),
        purpose='fine-tune'
    )
    fineTuneCreateCallBack = openai.FineTune.create(
        training_file=fileCreateCallBack.id, 
        model = "davinci",
        learning_rate_multiplier = 0.05
    )
    fineTuneRecords_davinci = fineTuneRecords_davinci.append({"fold": i + 1, "file": fileCreateCallBack.id, "ft": fineTuneCreateCallBack.id}, ignore_index = True)

In [None]:
# Make an ordered array of fine-tuned model names 
fineTuneRecords_davinci["model"] = "None"
for i in range(0,2):
    fineTuneRecords_davinci["model"][i] = (openai.FineTune.retrieve(fineTuneRecords_davinci["ft"][i]).fine_tuned_model)
fineTuneRecords_davinci.to_csv("fineTuneRecords_davinci.csv")
fineTuneRecords_davinci

In [None]:
!openai api fine_tunes.follow -i ft-hGZLzCma2pF1hKMbwQD2eWnr	

In [None]:
# GET FINE-TUNE AND UNTUNED PREDICTIONS FOR REMAINING 9 FOLDS (USING THE PYTHON API RATHER THAN CLI)
for i in range(1,2):
    testSet = pandas.read_json("data/test" + str(i+1) + ".jsonl", orient='records', lines=True)[["title","version","prompt","completion"]]
    testSet["ft_prediction"] = testSet.apply(lambda x : openai.Completion.create(model=fineTuneRecords_davinci["model"][i], 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,2035:100},
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
    testSet["untuned_prediction"] = testSet.apply(lambda x : openai.Completion.create(model="davinci", 
                                    prompt=x["prompt"],
                                    logit_bias={3763:100,645:100,2035:100},
                                    temperature=0, max_tokens=1).choices[0].text, axis = 1)
    testSet["fold"] = i + 1
    testSet["model"] = fineTuneRecords_davinci["model"][i]
    results_davinci = pandas.concat([results_davinci,testSet])

In [None]:
results_davinci