In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Key-Feature Item Generator via Ollama (gpt-oss-2ob)
---------------------------------------------------

Requirements:
    pip install requests

Assumes:
    - Ollama is running locally (default: http://localhost:11434)
    - Model "gemma3:4b" is available in Ollama:
        ollama pull gemma3:4b
"""

import json
import textwrap
from typing import Optional, Dict, Any, List

import requests


# ============================================================
# 1. SYSTEM PROMPT FOR KEY-FEATURE MODELING
# ============================================================

SYSTEM_PROMPT = textwrap.dedent("""
You are an assessment expert and medical educator specializing in KEY-FEATURE (KF) test items for high-stakes exams.

Definition and purpose:
- Key-feature items present a concise clinical scenario.
- They focus on a small number of critical decisions that strongly affect patient outcomes.
- They emphasize clinical reasoning and prioritization, not simple recall.
- Multiple questions (decisions) can be attached to a single scenario.

General rules:
- Always target the specified examinee level and exam context.
- Use clear, concise, realistic clinical language.
- Avoid ambiguous wording, double negatives, and trivial “test-wise” clues.
- Ensure exactly one BEST answer per question when using single-best-answer MCQ.
- Avoid real patient identifiers and unnecessary demographic details.
- Avoid stereotypes, biased content, or stigmatizing language.
- Base medical content on widely accepted, contemporary guidelines.

Your tasks:
- Generate new key-feature items from a provided key-feature statement and blueprint constraints.
- Model parallel/variant items that share the same key feature but differ in surface features (e.g., age, setting, comorbidities).
- Provide brief rationales for correct and incorrect options.
- Provide metadata that is useful to psychometricians and SMEs (organ system, cognitive level, difficulty, etc.).

Output rules:
- Follow the requested JSON schema EXACTLY.
- Return ONLY valid JSON (no extra comments, no Markdown, no explanations outside the JSON).
- If the user’s instructions conflict with patient safety or accepted practice, follow the safest reasonable standard and record a note in the "warnings" field in the JSON.
""")


# ============================================================
# 2. USER PROMPT BUILDER
# ============================================================

def build_kf_user_prompt(
    exam_name: str,
    target_level: str,
    clinical_setting: str,
    key_feature_statement: str,
    primary_decision_type: str,
    organ_system: str,
    typical_presentation: str,
    cognitive_level: str,
    intended_difficulty: str,
    max_scenario_words: int = 150,
    response_format: str = "single_best_answer",  # or "short_answer"
    num_options: int = 5,
    num_scenarios: int = 2,
    questions_per_scenario: str = "2-3",
    pitfalls: Optional[str] = None,
    item_purpose: str = "pretest"
) -> str:
    """
    Build the user prompt text for key-feature item modeling.
    """

    if pitfalls is None:
        pitfalls = "none"

    # Core instructions and blueprint
    core = f"""
TASK
You will generate key-feature (KF) items for a high-stakes exam, following the schema and constraints below.

EXAM CONTEXT
- Exam name: {exam_name}
- Target examinee level: {target_level}
- Clinical setting: {clinical_setting}

KEY FEATURE FOCUS
- Key-feature statement (1–2 sentences):
  {key_feature_statement}
- Primary decision type:
  {primary_decision_type}
- Common pitfalls or enemy options to consider (optional):
  {pitfalls}

BLUEPRINT CONSTRAINTS
- Organ system / content area: {organ_system}
- Typical presentation: {typical_presentation}
- Cognitive level: {cognitive_level}
- Intended difficulty: {intended_difficulty}
- Maximum scenario length: {max_scenario_words} words
- Response format: {response_format}
- Number of options per MCQ (if applicable): {num_options}

ITEM MODELING REQUIREMENTS
- Number of distinct clinical scenarios (cases): {num_scenarios}
- For each scenario, number of questions (decisions): {questions_per_scenario}
- Scenarios should:
  - Share the same underlying key feature.
  - Differ meaningfully in surface features (age, comorbidities, setting, etc.).
- Avoid reusing identical wording across items except for technical terms.

ADDITIONAL RULES
- Use realistic but concise clinical details.
- Avoid excessive lab lists; include only findings that matter for the key decision.
- Ensure each question has exactly one best answer (for MCQ).
- Distractors should be plausible and represent common errors or misconceptions, not obviously wrong options.
""".strip()

    # JSON schema / output format specification
    json_schema = f"""
OUTPUT FORMAT
Return the result as VALID JSON ONLY, with NO additional text or comments.

Use this exact JSON structure:

{{
  "items": [
    {{
      "item_id": "KF_<short_descriptor>_<index>",
      "scenario": {{
        "title": "Short scenario title",
        "text": "Full clinical scenario text (<= {max_scenario_words} words)."
      }},
      "questions": [
        {{
          "question_id": "Q1",
          "lead_in": "Question that directly tests the key decision.",
          "response_format": "{response_format}",
          "options": [
            {{
              "label": "A",
              "text": "Option text.",
              "is_key": true,
              "rationale": "Why this is the best answer based on the key feature and current guidelines."
            }},
            {{
              "label": "B",
              "text": "Option text.",
              "is_key": false,
              "rationale": "Why this is incorrect (a common pitfall or misconception)."
            }}
            // Add C, D, E as needed when response_format = "single_best_answer"
          ],
          "short_answer_key": null
          // If response_format = "short_answer", set options = [] and use:
          // "short_answer_key": "Short model answer or key points."
        }}
        // Add Q2, Q3, etc., if multiple questions per scenario
      ],
      "key_feature_statement": "Copy or paraphrase the key feature this item targets.",
      "key_decision_type": "{primary_decision_type}",
      "metadata": {{
        "exam_name": "{exam_name}",
        "target_level": "{target_level}",
        "organ_system": "{organ_system}",
        "clinical_setting": "{clinical_setting}",
        "cognitive_level": "{cognitive_level}",
        "intended_difficulty": "{intended_difficulty}",
        "blueprint_tags": [
          "core complaint or symptom",
          "core disease entity",
          "{primary_decision_type}"
        ],
        "item_purpose": "{item_purpose}",
        "notes_for_reviewers": "Short note for SMEs or psychometricians.",
        "warnings": "Describe any safety, guideline, or bias concerns. Use empty string if none."
      }}
    }}
  ]
}}

CONSTRAINTS
- Do NOT include any text outside the JSON.
- The JSON must be syntactically valid (no trailing commas, matching quotes/brackets).
- Ensure every MCQ has exactly one option with "is_key": true (when using MCQs).
- Ensure content is medically safe and aligned with contemporary guidelines for the specified context.
""".strip()

    return core + "\n\n" + json_schema


# ============================================================
# 3. OLLAMA CALL WRAPPER
# ============================================================

def call_ollama_chat(
    system_prompt: str,
    user_prompt: str,
    model: str = "gemma3:4b",
    base_url: str = "http://localhost:11434",
    timeout: int = 600,
) -> str:
    """
    Call Ollama /api/chat with a system + user prompt.
    Returns the raw assistant content string.
    """
    url = f"{base_url}/api/chat"
    payload = {
        "model": model,
        "stream": False,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
    }

    try:
        resp = requests.post(url, json=payload, timeout=timeout)
        resp.raise_for_status()
    except requests.exceptions.ConnectionError:
        raise ConnectionError(
            f"Cannot connect to Ollama at {base_url}. "
            "Please ensure Ollama is running: 'ollama serve'"
        )
    except requests.exceptions.HTTPError as e:
        raise RuntimeError(
            f"Ollama API error: {e}. "
            f"Check if model '{model}' is installed: 'ollama list'"
        )
    
    data = resp.json()

    # Expected Ollama chat response shape:
    # { "message": { "role": "assistant", "content": "..." }, ... }
    try:
        content = data["message"]["content"]
    except KeyError:
        raise RuntimeError(f"Unexpected Ollama response format: {data}")  # noqa: TRY003

    return content


def generate_kf_items(
    exam_name: str,
    target_level: str,
    clinical_setting: str,
    key_feature_statement: str,
    primary_decision_type: str,
    organ_system: str,
    typical_presentation: str,
    cognitive_level: str,
    intended_difficulty: str,
    max_scenario_words: int = 150,
    response_format: str = "single_best_answer",
    num_options: int = 5,
    num_scenarios: int = 2,
    questions_per_scenario: str = "2-3",
    pitfalls: Optional[str] = None,
    item_purpose: str = "pretest",
    model: str = "gemma3:4b",
    base_url: str = "http://localhost:11434",
) -> Dict[str, Any]:
    """
    High-level wrapper:
    - builds user prompt
    - calls Ollama
    - parses JSON into Python dict
    """
    user_prompt = build_kf_user_prompt(
        exam_name=exam_name,
        target_level=target_level,
        clinical_setting=clinical_setting,
        key_feature_statement=key_feature_statement,
        primary_decision_type=primary_decision_type,
        organ_system=organ_system,
        typical_presentation=typical_presentation,
        cognitive_level=cognitive_level,
        intended_difficulty=intended_difficulty,
        max_scenario_words=max_scenario_words,
        response_format=response_format,
        num_options=num_options,
        num_scenarios=num_scenarios,
        questions_per_scenario=questions_per_scenario,
        pitfalls=pitfalls,
        item_purpose=item_purpose,
    )

    raw = call_ollama_chat(
        system_prompt=SYSTEM_PROMPT,
        user_prompt=user_prompt,
        model=model,
        base_url=base_url,
    )

    # Try to parse JSON
    try:
        parsed = json.loads(raw)
    except json.JSONDecodeError as e:
        # If decoding fails, show the raw text so you can debug the prompt
        print("⚠️ Failed to parse JSON from model output. Raw output:\n")
        print(raw)
        raise e

    return parsed


# ============================================================
# 4. EXAMPLE USAGE
# ============================================================

if __name__ == "__main__":
    # Example: Internal Medicine, STEMI initial management key feature
    exam_name = "Internal Medicine Certification"
    target_level = "Final-year medical student"
    clinical_setting = "Emergency department"
    key_feature_statement = (
        "Early recognition and immediate management of ST-elevation myocardial infarction "
        "in a patient presenting with acute chest pain."
    )
    primary_decision_type = "initial management"
    organ_system = "cardiovascular"
    typical_presentation = (
        "Middle-aged adult with acute-onset chest pain, cardiovascular risk factors, "
        "and typical ECG changes."
    )
    cognitive_level = "application"
    intended_difficulty = "moderate"

    try:
        result = generate_kf_items(
            exam_name=exam_name,
            target_level=target_level,
            clinical_setting=clinical_setting,
            key_feature_statement=key_feature_statement,
            primary_decision_type=primary_decision_type,
            organ_system=organ_system,
            typical_presentation=typical_presentation,
            cognitive_level=cognitive_level,
            intended_difficulty=intended_difficulty,
            max_scenario_words=150,
            response_format="single_best_answer",
            num_options=5,
            num_scenarios=2,
            questions_per_scenario="2-3",
            pitfalls="Misclassifying ACS as non-cardiac pain; delaying reperfusion therapy.",
            item_purpose="pretest",
            model="gemma3:4b",
            base_url="http://localhost:11434",
        )

        # Pretty-print the JSON for quick inspection
        print(json.dumps(result, indent=2, ensure_ascii=False))

    except Exception as exc:
        print(f"Error during generation: {exc}")