In [1]:
import openai
import pandas as pd
with open('../gpt_key.txt', 'r') as file:
    openai.api_key = file.read().rstrip()

### Prompt bank
Here we can create different ways of prompting GPT3 with medical text.

In [2]:
prompt_bank = {
      "Prompt 'something'": "something"
}

### Import Medical Data CSV as Dataframe

In [3]:
data = pd.read_csv('...', sep='\t')

### Temperature
So, temperature ranges from 0.0-1.0, where 0.0 has no randomness, and 1.0 allows GPT-3 to be very "creative." Temperature=0.0 will give the same exact result each time. For most natural language generation tasks, people set temperature at ~0.7. We can set it at 0.3 (which is good at just giving binary answers), and maybe also try values of 0.5, and 0.7 for comparison (which will probably involve us parsing some responses).

In [4]:
temperature = 0.3

### Interact with GPT3 (binary true or false)

In [None]:
def a_prompt(phrase, prompt_type):
    return f"""True or False, the phrase '{phrase}' is {prompt_type}?
    Answer:"""

def parse_boolean_text(t):
  t = t.lower().split(' ')
  for val in ['true', 'false', 'yes', 'no', 'true.', 'false.', 'yes.', 'no.']:
    if val in t:
      return int(val == 'true') or int(val == 'yes')
  
  # Special case: sometimes GPT3 returns a negation.
  # this might be a bit of a hacky fix...
  if 'not' in t:
    return 0

  return -1

def score_row(row, prompt_type):
    response = openai.Completion.create(
                engine="text-davinci-001",
                prompt=a_prompt(row['text'], prompt_type),
                temperature=temperature,
              )
    return parse_boolean_text(response["choices"][0]["text"])

for key, prompt_type in prompt_bank.items():
  data[key] = data.apply(lambda row: score_row(row, prompt_type=prompt_type), axis=1)

### Manual inspection

In [None]:
p = data.iloc[7]['text']
print(p)

In [None]:
phrase = p
print("Phrase: " + phrase)
print()
for p_t, prompt in prompt_bank.items():
  response = openai.Completion.create(
              engine="text-davinci-001",
              prompt=a_prompt(phrase, prompt),
              temperature=temperature,
            )
  print("For prompt: " + prompt)
  print(response["choices"][0]["text"])
  print(parse_boolean_text(response["choices"][0]["text"]))
  print()