In [24]:
import openai
import pandas as pd
with open('gpt_key.txt', 'r') as file:
    openai.api_key = file.read().rstrip()

### Temperature
So, temperature ranges from 0.0-1.0, where 0.0 has no randomness, and 1.0 allows GPT-3 to be very "creative." Temperature=0.0 will give the same exact result each time. For most natural language generation tasks, people set temperature at ~0.7. We can set it at 0.3 (which is good at just giving binary answers), and maybe also try values of 0.5, and 0.7 for comparison (which will probably involve us parsing some responses).

In [25]:
temperature = 0.3

### Interact with GPT3 (binary true or false)

In [26]:
df = pd.read_csv('medmcqa_train.csv')
subject_rows = {}
for idx, row in df.iterrows():
    subject_rows[row['subject']] = row

In [27]:
df_small = df.sample(n=4)
df = df_small

In [41]:
verbose = True

def zero_shot_empty(question):
    return f"""
    Question: '{question}'

    Answer: among A through D, the answer is """

def zero_shot_CoT(question):
    return f"""
    Question: '{question}'
    Answer: Let's think step by step
    Therefore, among A through D, the answer is """

def zero_shot_empty_plus_grounding(question, context):
    return f"""
    Context: '{context}'
    Question: '{question}'
    
    Answer: among A through D, the answer is """

def one_shot(question, shot_question, shot_explanation, shot_answer):
    return f"""
    Question: '{shot_question}'
    Answer: Let's think step by step. '{shot_explanation}'
    Therefore, among A through D, the answer is '{shot_answer}'

    Question: '{question}'
    Answer: Let's think step by step
    Therefore, among A through D, the answer is """

def correct_letter(correct_answer, text_answer):
    text_answer = text_answer.lower().split(' ')
    if correct_answer == 'A':
        if 'a' in text_answer:
            return 1
    if correct_answer == 'B':
        if 'b' in text_answer:
            return 1
    if correct_answer == 'C':
        if 'c' in text_answer:
            return 1
    if correct_answer == 'D':
        if 'd' in text_answer:
            return 1
    return 0

def score_row(row):
    zero_shot_empty_answer = openai.Completion.create(
                engine = "text-davinci-001",
                prompt = zero_shot_empty(row['question']),
                temperature = temperature)

    zero_shot_CoT_answer = openai.Completion.create(
                engine = "text-davinci-001",
                prompt = zero_shot_CoT(row['question']),
                temperature = temperature)

    # zero_shot_empty_plus_grounding_answer = openai.Completion.create(
    #             engine = "text-davinci-001",
    #             prompt = zero_shot_empty_plus_grounding(row['question'], row['question']),
    #             temperature = temperature)

    example_row = subject_rows[row['subject']]

    one_shot_answer = openai.Completion.create(
                engine = "text-davinci-001",
                prompt = one_shot(row['question'], 
                                  example_row['question'],
                                  example_row['explanation'],
                                  example_row['answer_letter']),
                temperature = temperature)

    if verbose:
        print('Correct answer:' + row['answer_letter'])
        print()
        # print(zero_shot_empty(row['question']))
        print(zero_shot_empty_answer["choices"][0]["text"])
        print()
        # print(zero_shot_CoT(row['question']))
        print(zero_shot_CoT_answer["choices"][0]["text"])
        print()
        # print(one_shot(row['question'], 
        #                 example_row['question'],
        #                 example_row['explanation'],
        #                 example_row['answer_letter']))
        print(one_shot_answer["choices"][0]["text"])
        print()
        print()

    return {
        'zero_shot_empty': correct_letter(row['answer_letter'], zero_shot_empty_answer["choices"][0]["text"]),
        'zero_shot_CoT': correct_letter(row['answer_letter'], zero_shot_CoT_answer["choices"][0]["text"]),
        'one_shot': correct_letter(row['answer_letter'], one_shot_answer["choices"][0]["text"])
    }

r = df.apply(lambda row: score_row(row), axis=1)

Correct answer:B

 D) Proteinuria > 3.5 gm per 1.73 m2

 D: Proteinuria > 3.5 gm per 1.73 m2



'B'


Correct answer:A

 A) Doxycycline 300mg OD


    (D) Cotrimoxazole

 'B'




KeyboardInterrupt: 

In [38]:
for k, v in r.items():
    print(v)

{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 1, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}


### Manual inspection

In [None]:
p = data.iloc[7]['text']
print(p)

In [None]:
phrase = p
print("Phrase: " + phrase)
print()
for p_t, prompt in prompt_bank.items():
  response = openai.Completion.create(
              engine="text-davinci-001",
              prompt=a_prompt(phrase, prompt),
              temperature=temperature,
            )
  print("For prompt: " + prompt)
  print(response["choices"][0]["text"])
  print(parse_boolean_text(response["choices"][0]["text"]))
  print()

In [None]:
def parse_boolean_text(t): # (response["choices"][0]["text"])
  t = t.lower().split(' ')
  for val in ['true', 'false', 'yes', 'no', 'true.', 'false.', 'yes.', 'no.']:
    if val in t:
      return int(val == 'true') or int(val == 'yes')
  
  # Special case: sometimes GPT3 returns a negation.
  # this might be a bit of a hacky fix...
  if 'not' in t:
    return 0

  return -1