In [1]:
import openai
import pandas as pd
with open('gpt_key.txt', 'r') as file:
    openai.api_key = file.read().rstrip()

### Temperature
So, temperature ranges from 0.0-1.0, where 0.0 has no randomness, and 1.0 allows GPT-3 to be very "creative." Temperature=0.0 will give the same exact result each time. For most natural language generation tasks, people set temperature at ~0.7. We can set it at 0.3 (which is good at just giving binary answers), and maybe also try values of 0.5, and 0.7 for comparison (which will probably involve us parsing some responses).

In [2]:
temperature = 0.3

### Interact with GPT3 (binary true or false)

In [3]:
df = pd.read_csv('medmcqa_train.csv')
subject_rows = {}
for idx, row in df.iterrows():
    subject_rows[row['subject']] = row

In [4]:
df_small = df.sample(n=4)
df = df_small

In [5]:
verbose = True

def suffix():
    return """ is therefore the answer, among A through D."""


def zero_shot_empty(question):
    return f"""
    Question: '{question}'
    Answer:
    """

def zero_shot_CoT(question):
    return f"""
    Question: '{question}'
    Answer: Let's think step by step """

def zero_shot_empty_plus_grounding(question, context):
    return f"""
    Context: '{context}'
    Question: '{question}'
    Answer:
    """

def one_shot(question, shot_question, shot_explanation, shot_answer):
    return f"""
    Question: '{shot_question}'
    Answer: Let's think step by step. '{shot_explanation}'
    '{shot_answer}' is therefore the answer, among A through D.

    Question: '{question}'
    Answer: Let's think step by step
    """

def correct_letter(correct_answer, text_answer):
    text_answer = text_answer.lower().split(' ')
    if correct_answer == 'A':
        if 'a' in text_answer:
            return 1
    if correct_answer == 'B':
        if 'b' in text_answer:
            return 1
    if correct_answer == 'C':
        if 'c' in text_answer:
            return 1
    if correct_answer == 'D':
        if 'd' in text_answer:
            return 1
    return 0

def score_row(row):
    zero_shot_empty_answer = openai.Completion.create(
                engine = "text-davinci-003", #text-davinci-003 davinci-instruct-beta
                prompt = zero_shot_empty(row['question']),
                suffix = suffix(),
                temperature = temperature)

    zero_shot_CoT_answer = openai.Completion.create(
                engine = "text-davinci-003", #text-davinci-003
                prompt = zero_shot_CoT(row['question']),
                suffix = suffix(),
                temperature = temperature)

    # zero_shot_empty_plus_grounding_answer = openai.Completion.create(
    #             engine = "text-davinci-001",
    #             prompt = zero_shot_empty_plus_grounding(row['question'], row['question']),
    #             temperature = temperature)

    example_row = subject_rows[row['subject']]

    one_shot_answer = openai.Completion.create(
                engine = "text-davinci-003",
                prompt = one_shot(row['question'], 
                                  example_row['question'],
                                  example_row['explanation'],
                                  example_row['answer_letter']),
                suffix = suffix(),
                temperature = temperature)

    if verbose:
        print('Correct answer:' + row['answer_letter'])
        print()
        print(zero_shot_empty_answer["choices"][0]["text"])
        print()
        print(zero_shot_CoT_answer["choices"][0]["text"])
        print()
        print(one_shot_answer["choices"][0]["text"])
        print()
        print()

    return {
        'zero_shot_empty': correct_letter(row['answer_letter'], zero_shot_empty_answer["choices"][0]["text"]),
        'zero_shot_CoT': correct_letter(row['answer_letter'], zero_shot_CoT_answer["choices"][0]["text"]),
        'one_shot': correct_letter(row['answer_letter'], one_shot_answer["choices"][0]["text"])
    }

r = df.apply(lambda row: score_row(row), axis=1)

Correct answer:D

(A) Vitamin D

Cholesterol is not a precursor for the synthesis

about this question. Cholesterol is a type of lipid, or fat. It

Ans. is 'D' i.e., Lipocortin *


Correct answer:C

C. Mandibular nerve

The jugular foramen is a hole

to answer this question. The jugular foramen is an opening in the skull

'Answer is D i.e., Internal jugular vein.o Jug


Correct answer:D

B. Water is available at a depth of more than 15 metres.
This

through the options. 
Option A is a problem village, as it is

'D' is the correct answer. "Problem village" include all of the


Correct answer:C

(C) Intensity of stimulus and sensation felt

Weber-F

about this question. Weber Fechner law is a psychophysical law that states

Answer: 'C' is therefore the answer, among A through D. Weber




In [6]:
for k, v in r.items():
    print(v)

{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}
{'zero_shot_empty': 0, 'zero_shot_CoT': 0, 'one_shot': 0}


In [15]:
one_shot_answer = openai.Completion.create(
                engine = "text-davinci-003",
                prompt = one_shot(row['question'], 
                                  df.iloc[2]['question'],
                                  df.iloc[2]['explanation'],
                                  df.iloc[2]['answer_letter']),
                suffix = suffix(),
                temperature = temperature)

In [16]:
one_shot_answer['choices'][0].text

'Gluten hypersensitivity is a condition in which the body reacts to gluten, a'