# Get qualitative completions 

We go through fine tuning conversations and ask 4o-mini to respond.

In [6]:
from glob import glob
import pandas as pd
import textwrap
import random
import json
import backoff
import openai

In [4]:
pwd

'/Users/tunder/Library/CloudStorage/Dropbox/python/GPT-1914/anachronism/qualitative'

In [81]:
with open('questions.json', mode = 'r') as file:
    questions = json.load(file)
len(questions)

249

In [12]:
def print_wrapped_text(text, width=70):
    texts = text.split('\n')
    if len(texts) > 1:
        for t in texts:
            print_wrapped_text(t, width=70)

    else:
        text = texts[0]
        wrapper = textwrap.TextWrapper(width=width)
        wrapped_text = wrapper.fill(text)
        print(wrapped_text)
    
with open('../credentials.txt', encoding = 'utf-8') as f:
    organization = f.readline().strip()
    api_key = f.readline().strip()
    
client = openai.OpenAI(organization=organization, api_key=api_key)

### The function that actually calls the API

We surround this with ```backoff``` instructions to avoid errors.

In [13]:
@backoff.on_exception(
    backoff.expo,
    openai.RateLimitError,
    max_time=60,  # Set a maximum wait time in seconds (adjust as needed)
    giveup=lambda e: False  # This prevents giving up on retries
)
def completions_with_backoff(**kwargs):
    global client
    try:
        return client.chat.completions.create(**kwargs)
    except openai.APIError as e:
        print(f"Error: {e}")
        raise  # Re-raise the error to trigger the retry mechanism

In [47]:
def submit_prompt(system_prompt, edwardian_prompt, temperature):

    prompt = [{"role": "system", "content": system_prompt},
              {"role": "user", "content": edwardian_prompt}]

    p = list(prompt)
    # print(p)
    try:
        completion = completions_with_backoff(
            model = 'gpt-4o-2024-08-06',
            messages = p,
            max_tokens = 500,
            temperature = temperature
        )
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
    return completion

In [None]:
system_prompt = '''You are producing a simulation of early twentieth-century print culture.
Respond to all questions and instructions as if you were living in early 1914, using
only knowledge written down or believed at the time. If you're asked about a person, event, or idea that wouldn't have been discussed in print in 1914,
explain that you're unfamiliar with the term. 

You may be asked to complete stories. In doing so, you should draw only
on your knowledge of stories published  before 1914, and on the details 
provided in the prompt. You cannot refuse to complete the story,
or invent a different story. You must imagine events that 
could complete the story you have been given--but without echoing or
copying any story published after 1914. It's fine to echo stories
published before 1914.

Some ideas and terms that are now unacceptable would have been normal in the early twentieth century. 
So let yourself be guided by the language and attitudes of the time. Also, since this
is meant to be a realistic simulation, avoid self-conscious remarks that give away
your later perspective--like, "Here in 1914, we still believe that ..." Simply
respond as if you were living in the early twentieth century. 
'''

In [79]:
system_prompt = system_prompt.replace('\n\n', '\t').replace('\n', ' ').replace('\t', '\n\n')
print_wrapped_text(system_prompt)

You are producing a simulation of early twentieth-century print
culture. Respond to all questions and instructions as if you were
living in early 1914, using only knowledge written down or believed at
the time. If you're asked about a person, event, or idea that wouldn't
have been discussed in print in 1914, explain that you're unfamiliar
with the term.

You may be asked to complete stories. In doing so, you should draw
only on your knowledge of stories published  before 1914, and on the
details  provided in the prompt. You cannot refuse to complete the
story, or invent a different story. You must imagine events that
could complete the story you have been given--but without echoing or
copying any story published after 1914. It's fine to copy stories
published before 1914.

Some ideas and terms that are now unacceptable would have been normal
in the early twentieth century.  So let yourself be guided by the
language and attitudes of the time. Also, since this is meant to be a
realistic 

In [83]:
# this is a comment

continuations = []
printnext = True
ctr = 0

for row in fiction:
    ctr += 1

    edwardian_prompt = row['user'][0] if isinstance(row['user'], list) else row['user']
    correct = row['assistant'][0] if isinstance(row['assistant'], list) else row['assistant']

    temperature = round(random.uniform(0.5, 0.9), 3)

    if printnext:
        print_wrapped_text(edwardian_prompt)
        print()

    try:
        completion = submit_prompt(system_prompt, edwardian_prompt, temperature)
    except Exception as e:
        print(f"Error: {e}")
        continue

    continuation = completion.choices[0].message.content
    continuation = continuation.replace('\n', ' ').replace('\t', ' ').replace('  ', ' ')
    responselen = len(continuation.split())   # token approximation

    # while responselen < 128:
    #     # print('*******************')
    #     # print(f"Ctr: {ctr}, Response length: {responselen}")
    #     # print_wrapped_text(continuation)
    #     # print()
    #     newprompt = edwardian_prompt + ' ' + continuation
    #     completion = submit_prompt(system_prompt, newprompt, date, temperature)
    #     newcontinuation = completion.choices[0].message.content
    #     newcontinuation = newcontinuation.replace('\n', ' ').replace('\t', ' ').replace('  ', ' ')
    #     continuation = continuation + ' ' + newcontinuation
    #     responselen = len(continuation.split())
    row['untuned_4oturbo'] = continuation
    continuations.append(row)
    if printnext:
        print_wrapped_text(continuation)
        print('-------------------\n')

    if ctr % 100 == 5:
        print(f"Completed {ctr} prompts\n")
        printnext = True


Continue this story for a few sentences. "In a pleasant green country
there lived a man named Bob. Bob was a humble creature, and little of
note happened in his life until a party of dwarves came to his door
one day and enlisted him in an adventure that would change everything.
They wanted to rob a dragon of his treasure! Bob accompanied them on
this adventure, and they succeeded in outwitting the dragon, but
another event happened along the way--apparently of little
significance at the time, but in the long run of even greater
importance. Bob discovered a small object!"

Bob discovered a small, unassuming ring buried in the soft earth of a
forest glade. It glimmered faintly in the dappled sunlight, catching
Bob's eye as he paused to rest from the journey. Curious, he picked it
up and slipped it into his pocket, thinking it a mere trinket. Little
did he know, this ring held powers beyond his understanding, and its
discovery would set into motion a series of events that would ripple
thr

In [49]:
len(continuations)

248

In [50]:
with open('4oBigUntunedAnswers.json', 'w', encoding='utf-8') as f:
    json.dump(continuations, f, indent=2, ensure_ascii=False)

In [51]:
print("You will be asked to assess four questions about each response.")
print("1. Could this response have been written in 1914? Stylistic flaws count as no.")
print("2. Does it fall into the specified trap?")
print("3. Does it wrongly refuse to answer? A false negative.")
print("4. Does it include false or anachronistic information? A false positive.")
print("Together 3 and 4 count as substantive errors. 2 will be a subset of those.")
print("1 will be a subset of responses that don't make substantive errors. A subset that")
print("is also stylistically consonant.")

judged = []

def trueorfalse(query):
    user_response = input(query)
    if user_response.lower() == 'y':
        return True
    else:
        return False

for row in continuations:
    prompt = row['user'][0] if isinstance(row['user'], list) else row['user']
    correct = row['assistant'][0] if isinstance(row['assistant'], list) else row['assistant']
    response = row['untuned_4oturbo']
    if 'trap' in row:
        trap = row['trap']
    else:
        trap = 'no trap'
    
    print_wrapped_text(prompt)
    print()
    print_wrapped_text('CORRECT: ' + correct)
    print()
    print_wrapped_text('RESPONSE: ' + response)
    print()
    print(f"TRAP: {trap}")
    print()
    row['plausibly1914'] = trueorfalse('Could this response have been written in 1914? (y/n) ')
    row['fallsintotrap'] = trueorfalse('Does it fall into the trap? (y/n) ')
    row['falsenegative'] = trueorfalse('Does it wrongly refuse to answer? (y/n) ')
    row['falsepositive'] = trueorfalse('Does it include false or anachronistic information? (y/n) ')

    judged.append(row)
    print(len(judged))
    print('-------------------\n')



You will be asked to assess four questions about each response.
1. Could this response have been written in 1914? Stylistic flaws count as no.
2. Does it fall into the specified trap?
3. Does it wrongly refuse to answer? A false negative.
4. Does it include false or anachronistic information? A false positive.
Together 3 and 4 count as substantive errors. 2 will be a subset of those.
1 will be a subset of responses that don't make substantive errors. A subset that
is also stylistically consonant.
Who was Sir Herbert Benjamin Edwardes?

CORRECT: English soldier-statesman in India, was born at Frodesley in
Shropshire on the 12th of November 1819. His father was Benjamin
Edwardes, rector of Frodesley, and his grandfather Sir John Edwardes,
baronet, eighth holder of a title conferred on one of his ancestors by
Charles I. in 1644. He distinguished himself during the Siege of
Multan during the Second Sikh War (1848–1849). For his services he
received the thanks of both houses of parliament, 

In [52]:
with open('4oBigUntunedJudged.json', 'w', encoding='utf-8') as f:
    json.dump(continuations, f, indent=2, ensure_ascii=False)

In [53]:
# Iterate through rows and calculate two percentages

# First, what proportion of rows with a 'trap' key have 'fallsintotrap' == True?
# Second, what proportion of rows have True in either 'falsenegative' or 'falsepositive'?

trapcount = 0
trapfallen = 0
substantiveerrorcount = 0

for row in judged:
    if 'trap' in row:
        trapcount += 1
        if row['fallsintotrap']:
            trapfallen += 1
    if row['falsenegative'] or row['falsepositive']:
        substantiveerrorcount += 1

print(f"Proportion of rows with a trap that fall into the trap: {trapfallen/trapcount}")
print(f"Proportion of rows with a substantive error: {substantiveerrorcount/len(judged)}")

Proportion of rows with a trap that fall into the trap: 0.4108527131782946
Proportion of rows with a substantive error: 0.33064516129032256


In [54]:
# iterate through rows and construct a set of subj fields

subjects = set()
for row in judged:
    if 'subj' in row:
        subjects.add(row['subj'])

subjects

{'atoms',
 'cholera',
 'churchill',
 'ethnology',
 'eugenics',
 'fossils',
 'frozen',
 'hobbit',
 'hungergames',
 'indochina',
 'radio',
 'sinojapanese',
 'starwars'}

In [82]:
# construct a subset, fiction, that includes only rows where
# sub is in the set {'hobbit', 'hungergames', 'frozen', 'starwars'}
# as you add rows to this subset, also add a key 'index' that is the index
# of the row in the original list

fiction = []
for row in questions:
    if 'subj' in row:
        if row['subj'] in {'hobbit', 'hungergames', 'frozen', 'starwars', 'goblinmarket', 'tarzan'}:
            row['index'] = questions.index(row)
            fiction.append(row)

In [75]:
row

{'user': ['Continue this story for a few sentences. "In a pleasant green country there lived a man named Bob. Bob was a humble creature, and little of note happened in his life until a party of dwarves came to his door one day and enlisted him in an adventure that would change everything. They wanted to rob a dragon of his treasure! Bob accompanied them on this adventure, and they succeeded in outwitting the dragon, but another event happened along the way--apparently of little significance at the time, but in the long run of even greater importance. Bob discovered a small object!"'],
 'assistant': ['"It was a piece of iron that had been thrown away by the Dragon, and it was a great curiosity for the Elves, who knew how to use it; and they brought it back with them as a gift from the Fairy, and it was called the Iron Ladle. The next thing that happened was the appearance of the Iron Ladle, and it was brought into the presence of the King of the Dwarfs. He asked what it could be, and th