# Get GPT-4o completions for Edwardian prompts

We select passages from Edwardian books and ask GPT-4o to continue in the same style.

In [36]:
from glob import glob
import pandas as pd
import textwrap
import random

import backoff
import openai

In [17]:
edward = pd.read_csv('edwardian_segments.tsv', sep='\t')
edward.shape

(11930, 3)

In [None]:
# These commands were run the first time this executed,
# in order to divide the dataset into two halves, only
# one of which is used for generation. 

# edward['use_as_prompt'] = [random.randint(0, 1) for _ in range(len(edward))]
# edward.to_csv('edwardian_segments.tsv', sep='\t', index=False)

In [39]:
prompts = edward[edward['use_as_prompt'] == 1].copy()
prompts = prompts.sample(1000)

In [7]:
def print_wrapped_text(text, width=70):
    texts = text.split('\n')
    if len(texts) > 1:
        for t in texts:
            print_wrapped_text(t, width=70)

    else:
        text = texts[0]
        wrapper = textwrap.TextWrapper(width=width)
        wrapped_text = wrapper.fill(text)
        print(wrapped_text)
    
with open('credentials.txt', encoding = 'utf-8') as f:
    organization = f.readline().strip()
    api_key = f.readline().strip()
    
client = openai.OpenAI(organization=organization, api_key=api_key)

### The function that actually calls the API

We surround this with ```backoff``` instructions to avoid errors.

In [8]:
@backoff.on_exception(
    backoff.expo,
    openai.RateLimitError,
    max_time=60,  # Set a maximum wait time in seconds (adjust as needed)
    giveup=lambda e: False  # This prevents giving up on retries
)
def completions_with_backoff(**kwargs):
    global client
    try:
        return client.chat.completions.create(**kwargs)
    except openai.APIError as e:
        print(f"Error: {e}")
        raise  # Re-raise the error to trigger the retry mechanism

In [40]:
def submit_prompt(system_prompt, edwardian_prompt, date, temperature):

    prompt = [{"role": "system", "content": system_prompt},
              {"role": "user", "content": edwardian_prompt}]

    p = list(prompt)
    # print(p)
    try:
        completion = completions_with_backoff(
            model = "gpt-4-turbo",
            messages = p,
            max_tokens = 220,
            temperature = temperature
        )
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
    return completion

In [38]:
system_prompt = "Your task is to complete passages from early twentieth-century books. \
You will be given a passage from a book published in "
system_prompt += str(date) # convert date to string
system_prompt += ". Continue this passage in the same style, for at least 200 words. \
Only provide this continuation; do not make any framing remarks, like 'here is the continuation:'."

In [42]:
prompts.head()

Unnamed: 0,source,date,segment,use_as_prompt
9492,mdp.39015016100219,1913,"The definition is an arbitrary one, and not ve...",1
2223,mdp.39015024521703,1913,"Nietzsche himself, in one of his moments of in...",1
6928,mdp.39015063623899,1912,The rapid movement gave to Charles II's force ...,1
1299,mdp.39015059431950,1908,"After the degenerate has served his sentence, ...",1
4605,mdp.39015075026594,1912,"Re sure to send to the Ansco Company, Birmingh...",1


In [43]:
# this is a comment

continuations = []
printnext = False
ctr = 0

for idx, row in prompts.iterrows():
    ctr += 1

    edwardian_prompt = row['segment']
    date = row['date']
    temperature = round(random.uniform(0.5, 0.7), 3)

    if printnext:
        print_wrapped_text(system_prompt)
        print()
        print_wrapped_text(edwardian_prompt)
        print()

    try:
        completion = submit_prompt(system_prompt, edwardian_prompt, date, temperature)
    except Exception as e:
        print(f"Error: {e}")
        continue

    continuation = completion.choices[0].message.content
    continuation = continuation.replace('\n', ' ').replace('\t', ' ').replace('  ', ' ')
    responselen = len(continuation.split())   # token approximation

    while responselen < 128:
        # print('*******************')
        # print(f"Ctr: {ctr}, Response length: {responselen}")
        # print_wrapped_text(continuation)
        # print()
        newprompt = edwardian_prompt + ' ' + continuation
        completion = submit_prompt(system_prompt, newprompt, date, temperature)
        newcontinuation = completion.choices[0].message.content
        newcontinuation = newcontinuation.replace('\n', ' ').replace('\t', ' ').replace('  ', ' ')
        continuation = continuation + ' ' + newcontinuation
        responselen = len(continuation.split())

    continuations.append(continuation)
    if printnext:
        print_wrapped_text(continuation)
        print('-------------------\n')

    if ctr % 200 == 5:
        print(f"Completed {ctr} prompts\n")
        printnext = True
        sofar = prompts[:ctr].copy()
        sofar['continuation'] = continuations
        sofar.to_csv('new_GPT4o_continuations.tsv', sep='\t', index=False)
    else:
        printnext = False

prompts['continuation'] = continuations


Completed 5 prompts

Your task is to complete passages from early twentieth-century books.
You will be given a passage from a book published in 1911. Continue
this passage in the same style, for at least 200 words. Only provide
this continuation; do not make any framing remarks, like 'here is the
continuation:'.

147 " Yes, I'Ll swear. Furthermore, there is not a flask in the
crowd." Once, twice, thrice, the fox repeated the gesture with cold
insolence. He was about to raise his paw for the fourth time, but the
club to a man turned and fled. They rode off as if the legions of
Lucifer were in pursuit; they rode as John Gilpin and Tam O'Shanter
rode, and the only word spoken was to affirm that there was not a
flask in the crowd. The pace was not slackened till the lights of the
Neville house had begun to wink at them in the darkness. The Neville
house; that was where they had seen the devilish thing first, sitting
on the wood - pile contemplating the scene, and they began to tell one
ano

In [45]:
prompts.shape

(1000, 5)

In [46]:
prompts.to_csv('new_GPT4o_continuations.tsv', sep='\t', index=False)