In [111]:
import openai
from my_secrets import API_KEY_OPENAI
import tiktoken
import textwrap
import os
from gpt_call import separate

openai.api_key = API_KEY_OPENAI
PRICES = {"text-curie-001":(0.0020 / 1000),"text-davinci-003":(0.020 / 1000)}

We would like to test how feasible it is to pass manually separated scenes as part of the prompt to GPT3.
One of the concerns is that the model might not be able to generate a coherent scene if the prompt is too short. We would like to test this by comparing the performance of GPT3 with and without a prompt. In addition, adding more text to the prompt adds cost to the API call, so we would like to test how much text is needed to get a good result.

In [103]:
# We want to compare the output of two models on the same input, so we'll use the same dream for both.
models = ["text-curie-001","text-davinci-003"]

In [104]:

def get_samples():
    """
    Open the file with the manually separated scenes and return a list of the separated dreams.
    """
    with open("manual_scene_separation_data.txt", "r") as f:
        data = f.read()
        samples = data.split("###")[1:-1]
        counter = 1
        temp = []
        for counter,s in enumerate(samples):
            s = s.replace("IN:", "").strip()
            temp.append(s)
            #print(s)
            counter+=1
        return temp


In [None]:
# Get a random dream description from the excel file(dream corpus)
dream = separate()

In [126]:
command="Give short visual descriptions of the scenes in the following:"
examples = ""
samples = get_samples()
n = 0 # number of examples of manual separation to pass to the model
for i in range(0,min(len(samples),n)):
    examples+=samples[i]
    examples+=os.linesep
#print(examples)


In [127]:
# If we are passing examples in the prompt, we need to add "Examples:" to the prompt, otherwise we don't.
if examples!="":
    prompt = (f"{command}{os.linesep}Examples:\
{examples.strip()}\
{dream}") 
else:
    prompt = (f"{command}{os.linesep}{dream}")
#print(prompt)

In [128]:
def calc_price(model, prompt):   
    # Use the tiktoken library to tokenize the prompt and calculate the price accordingly.
    encoding = tiktoken.encoding_for_model(model)
    num_tokens = len(encoding.encode(prompt))
    return num_tokens * PRICES[model]

In [129]:
def write_to_file(prompt, generated_text,model,price):
    with open("compare_out.txt", "a+") as f:
        f.write(f'#Model:{model}, price:{round(price,5)}$ with {n} examples')
        f.write(os.linesep)
        f.write(f'#Prompt:{os.linesep}{prompt.split("Examples:")[0]}')
        f.write(f'{dream}')
        f.write(os.linesep)
        f.write(f'#Output: {os.linesep}{generated_text}')
        f.write(os.linesep)
        f.write(f'########################')
        f.write(os.linesep)
    

In [130]:
for m in models:
    prompt_price = calc_price(m, prompt)
    completions = openai.Completion.create(
        engine=m,
        prompt=prompt,
        max_tokens=512,
        n=1,
        stop=None, #optional token that stops the generation
        temperature=0.45, # not too high
    )
    generated_text = completions.choices[0].text
    gen_price = calc_price(m, generated_text)
    write_to_file(prompt, generated_text, m, prompt_price+gen_price)

    

In [131]:
# print(generated_text)
# write_to_file(prompt, generated_text, m, prompt_price+gen_price)
