In [38]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [111]:
from medplexity.benchmarks.medmcqa.medmcqa_loader import MedMCQALoader
from medplexity.benchmarks.medmcqa.medmcqa_dataset_builder import MedMCQADatasetBuilder
from medplexity.benchmarks.medmcqa.medmcqa_prompt_template import MedMCQAPromptTemplate


In [None]:
# Set your OpenAI API key
OPENAI_API_KEY = ""

In [119]:
loader = MedMCQALoader()

In [120]:
dataset = MedMCQADatasetBuilder().build_dataset("validation")

In [121]:
example_data_point = next(dataset.__iter__())

In [122]:
example_data_point

MedMCQADataPoint(input=MedMCQAInput(question='Which of the following is not true for myelinated nerve fibers:', options=['Impulse through myelinated fibers is slower than non-myelinated fibers', 'Membrane currents are generated at nodes of Ranvier', 'Saltatory conduction of impulses is seen', 'Local anesthesia is effective only when the nerve is not covered by myelin sheath']), expected_output=0, metadata=MedMCQAOutputMetadata(explanation=None, subject_name='Physiology'))

In [123]:
from medplexity.benchmarks.medmcqa.medmcqa_dataset_builder import MedMCQAInput

def input_adapter(medmcqa_input: MedMCQAInput):
    prompt_template = MedMCQAPromptTemplate()

    return prompt_template.format(
        question=medmcqa_input.question,
        options=medmcqa_input.options
    )

In [124]:
from medplexity.benchmarks.medmcqa.medmcqa_prompt_template import AnswerWithExplanation

def output_adapter(output_json: str) -> AnswerWithExplanation:
    parsed_output = MedMCQAPromptTemplate.parser.model_validate_json(output_json)

    return parsed_output

In [125]:
from medplexity.llms.openai_caller import OpenAI
from medplexity.chains.evaluation_adapter_chain import EvaluationAdapterChain

chain = EvaluationAdapterChain(
    llm=OpenAI(
        api_token=OPENAI_API_KEY
    ),
    input_adapter=input_adapter,
    output_adapter=output_adapter,
)

In [126]:
def comparator(expected_output: int, predicted_output: AnswerWithExplanation):
    letter_to_idx = { "(A)" : 1, "(B)": 2, "(C)": 3, "(D)": 4 }
    print(predicted_output)
    predicted_idx =  letter_to_idx[predicted_output.answer]

    return expected_output == predicted_idx

In [127]:
from medplexity.evaluators.sequential_evaluator import SequentialEvaluator

evaluator = SequentialEvaluator(
    chain=chain,
    comparator=comparator
)

In [128]:
dataset[0].input

MedMCQAInput(question='Which of the following is not true for myelinated nerve fibers:', options=['Impulse through myelinated fibers is slower than non-myelinated fibers', 'Membrane currents are generated at nodes of Ranvier', 'Saltatory conduction of impulses is seen', 'Local anesthesia is effective only when the nerve is not covered by myelin sheath'])

In [129]:
evaluation = evaluator.evaluate(dataset[0:2])

 50%|█████     | 1/2 [00:05<00:05,  5.12s/it]

answer='(A)' explanation="Let’s solve this step-by-step, referring to authoritative sources as needed. Myelinated nerve fibers conduct impulses faster than non-myelinated fibers, so the statement 'Impulse through myelinated fibers is slower than non-myelinated fibers' is not true. The other statements are all true for myelinated nerve fibers. Therefore, the correct answer is (A) Impulse through myelinated fibers is slower than non-myelinated fibers."


100%|██████████| 2/2 [00:09<00:00,  4.91s/it]

answer='(B)' explanation='Let’s solve this step-by-step, referring to authoritative sources as needed. The statement that glucose concentration in the capillaries is the same as that in glomerular filtrate is not true. Glucose is reabsorbed in the proximal tubules and therefore its concentration in the capillaries is higher than in the glomerular filtrate. Therefore, option (B) is the correct answer.'





In [130]:
correct, incorrect = evaluation.partition_by_correctness()