In [4]:
# Load the combined (literature + synthetic) dataset
import json
import pandas as pd

dataset_path = "../../datasets/combined/combined_jama.json"
with open(dataset_path, "r") as f:
    combined = json.load(f)

dataset = pd.DataFrame(combined)

In [5]:
# Draw 30 random cases from the combined dataset for diagnostic reasoning analysis and human comparison
import random
random.seed(1000)  # For reproducibility

# Exclude a set of case IDs if needed (seen by clinicians in previous round of evaluation)
excluded_case_ids = [48, 81, 90, 142, 166, 168, 176, 177, 44, 43,
            113, 114, 131, 132, 152, 156, 21, 27, 50, 89,
            110, 123, 153, 175, 1001, 1002, 1010, 1011, 1014, 1019]

filtered_combined = [case for case in combined if case['case_id'] not in excluded_case_ids]

reasoning_subset = random.sample(filtered_combined, 30)

In [6]:
# Export reasoning subset as Excel spreadsheet to preserve Unicode characters like quotation marks
reasoning_subset = pd.DataFrame(reasoning_subset)
reasoning_output_path = "../../datasets/reasoning/jama_reasoning_subset.xlsx"

reasoning_subset.to_excel(reasoning_output_path, index=False)
print(f"Exported reasoning subset to {reasoning_output_path}.")

Exported reasoning subset to ../../datasets/reasoning/jama_reasoning_subset.xlsx.


In [7]:
# Define system instructions and user prompt
with open("../prompts/system_prompt.txt") as f:
    system_prompt = f.read()

with open("../prompts/user_prompt.txt") as f:
    user_prompt = f.read()

In [8]:
from google import genai
from google.genai import types
from dotenv import load_dotenv

# Initialize the GenAI client
google_client = genai.Client()

# Load API key from environment variable
load_dotenv()

True

In [36]:
# Function to generate a diagnosis and extract diagnostic reasoning from the thinking blocks
def generate_diagnostic_reasoning(client, model: str, system_prompt: str, user_prompt: str, vignette: str, temperature: float) -> tuple:
    # Prepare API call parameters
    if model.startswith("gemini"):
        # Make API call and create response object
        response = client.models.generate_content(
            model=model,
            contents=user_prompt + "\n<vignette>\n" + vignette + "\n</vignette>",  # User prompt with inserted vignette
            config=types.GenerateContentConfig(
                thinking_config=types.ThinkingConfig(
                    thinking_level="high",  # Use thinking_level for Gemini 3, not thinking_budget since it may result in subpar performance
                    include_thoughts=True  # Include thought summaries in parts/thought within `response` parameters
                    ),
                system_instruction=system_prompt,  # System prompt
                temperature=temperature  # Model temperature
            ),
        )

        # Iterate through response object
        for part in response.parts:
            if not part.text:
                continue
            if part.thought:
                reasoning = part.text  # Extract thought summary
            else:
                answer = part.text  # Extract differential diagnosis list

    return reasoning, answer

In [37]:
# Iterate through the reasoning samples and generate diagnostic reasoning, saving the results
from tqdm import tqdm

model = "gemini-3-pro-preview"
for i, row in tqdm(reasoning_subset.iterrows(), desc=f"Generating diagnostic reasoning trace {i + 1} out of {reasoning_subset.shape[0]}, case {row['case_id']}"):
    # Generate diagnostic reasoning
    reasoning, answer = generate_diagnostic_reasoning(google_client,
                                                      model,
                                                      system_prompt,
                                                      user_prompt,
                                                      row["vignette"],
                                                      1,  # Google advises keeping temperature at 1 for Gemini 3 to avoid messing with reasoning behavior
                                                    )
    
    # Save the results to the DataFrame
    reasoning_subset.loc[i, "model_thoughts"] = reasoning
    reasoning_subset.loc[i, "model_diagnosis"] = answer
    print("Completed case ID:", row["case_id"])

Generating diagnostic reasoning trace 1 out of 30, case 181: 1it [00:52, 52.56s/it]

Completed case ID: 181


Generating diagnostic reasoning trace 1 out of 30, case 181: 1it [01:04, 64.32s/it]


KeyboardInterrupt: 

In [None]:
# Save to a JSON file
results_path = f"../../results/evaluate_diagnostic_reasoning/reasoning_samples_{model}.json"

reasoning_subset.to_json(results_path, orient="records", indent=2)
print(f"Reasoning samples for {model} saved to {results_path}.")