# üè• GPT-Powered Triage Scenario Generator

Generate gold-standard synthetic triage training data using GPT-5

---

In [None]:
%pip install -q openai tqdm

In [None]:
import openai
import json
import random
from tqdm import tqdm
from google.colab import userdata

client = openai.OpenAI(api_key=userdata.get('OPENAI_API_KEY'))

# Try GPT-5.2, fallback to available models
MODELS = ['gpt-5.2', 'gpt-5', 'gpt-4.5-turbo', 'gpt-4o', 'gpt-4-turbo']
MODEL = None

for m in MODELS:
    try:
        client.chat.completions.create(model=m, messages=[{"role": "user", "content": "test"}], max_tokens=5)
        MODEL = m
        print(f"‚úÖ Using: {m}")
        break
    except: continue

if not MODEL:
    print("‚ùå No model available")

In [None]:
# Gold-standard scenario generation prompt
SYSTEM_PROMPT = """You are an expert A&E Triage Nurse and clinical educator with 20 years of experience.
You are creating realistic training scenarios for the Manchester Triage System.

For each scenario, you MUST provide:
1. chief_complaint: What the patient or relative actually SAYS (in their own words, not clinical terminology)
2. vitals: Physiologically consistent values matching the presentation
   - hr (heart rate bpm)
   - bp_sys, bp_dia (blood pressure mmHg)
   - spo2 (oxygen saturation %)
   - rr (respiratory rate /min)
   - temp (temperature ¬∞C)
   - avpu (A=Alert, V=Voice, P=Pain, U=Unresponsive)
3. history: Brief clinical history (age, gender, relevant PMH, time course)

CRITICAL RULES:
- Vitals MUST be physiologically consistent (e.g., shock = low BP + high HR)
- Chief complaint should use patient language, not medical jargon
- Include diverse demographics (age, gender, ethnicity where relevant)
- Cover atypical presentations (elderly MI without chest pain, etc.)

Return ONLY valid JSON array, no markdown."""

CATEGORY_PROMPTS = {
    1: """Generate 10 Category 1 (IMMEDIATE/Red) scenarios.
MTS Discriminators: Airway compromise, Inadequate breathing, Shock, Unresponsive, Currently fitting

Include diverse presentations:
- Cardiac emergencies (STEMI, arrest, arrhythmias)
- Respiratory (airway obstruction, severe asthma, anaphylaxis)
- Neurological (stroke, status epilepticus, meningitis)
- Trauma (major bleeding, tension pneumothorax)
- Sepsis/shock
- Pediatric emergencies
- Obstetric emergencies""",
    
    2: """Generate 10 Category 2 (VERY URGENT/Orange) scenarios.
MTS Discriminators: Severe pain, Altered consciousness, Very hot adult/child, Significant mechanism

Include:
- Chest pain (possible ACS)
- Severe headache (possible SAH)
- Focal neurological deficit
- High fever with red flags
- Significant trauma without shock
- Acute abdomen""",
    
    3: """Generate 10 Category 3 (URGENT/Yellow) scenarios.
MTS Discriminators: Moderate pain, Hot adult/child, Persistent vomiting, Pleuritic pain

Include:
- COPD exacerbation
- Cellulitis
- Renal colic
- Fractures
- Moderate asthma
- Acute confusion (elderly)""",
    
    4: """Generate 10 Category 4 (STANDARD/Green) scenarios.
MTS Discriminators: Recent mild pain, Warm, Recent problem

Include:
- Minor injuries
- Viral illnesses
- Stable chronic conditions
- Minor lacerations
- Sprains and strains""",
    
    5: """Generate 10 Category 5 (NON-URGENT/Blue) scenarios.
MTS Discriminators: Recent mild problem

Include:
- Prescription requests
- Chronic stable issues
- Minor ailments suitable for GP
- Social admissions
- Minor skin conditions"""
}

print("‚úÖ Prompts configured")

In [None]:
def generate_scenarios(category, n_batches=5):
    """Generate scenarios for a category using GPT"""
    all_scenarios = []
    
    for batch in range(n_batches):
        try:
            response = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": CATEGORY_PROMPTS[category] + f"\n\nBatch {batch+1} - ensure unique scenarios."}
                ],
                temperature=0.8,
                max_tokens=4000
            )
            
            content = response.choices[0].message.content.strip()
            # Clean up markdown if present
            if content.startswith('```'):
                content = content.split('```')[1]
                if content.startswith('json'):
                    content = content[4:]
            
            scenarios = json.loads(content)
            all_scenarios.extend(scenarios)
            print(f"  Batch {batch+1}: {len(scenarios)} scenarios")
            
        except Exception as e:
            print(f"  Batch {batch+1} error: {str(e)[:50]}")
    
    return all_scenarios

print("‚úÖ Generator ready")

In [None]:
# Generate scenarios for all categories
print("üè• Generating Gold-Standard Triage Scenarios...\n")

all_scenarios = {}
batches_per_category = {1: 10, 2: 5, 3: 5, 4: 5, 5: 3}  # More Cat 1

for cat in [1, 2, 3, 4, 5]:
    print(f"\nüìã Category {cat}:")
    scenarios = generate_scenarios(cat, n_batches=batches_per_category[cat])
    all_scenarios[cat] = scenarios
    print(f"   Total: {len(scenarios)} scenarios")

print("\n‚úÖ Generation complete!")

In [None]:
# Format for training
def format_observation(s):
    vitals = s.get('vitals', {})
    return f"""PATIENT PRESENTING TO A&E TRIAGE

Chief Complaint: "{s.get('chief_complaint', 'Unknown')}"

Vitals:
- HR: {vitals.get('hr', 0):.0f} bpm
- BP: {vitals.get('bp_sys', 0):.0f}/{vitals.get('bp_dia', 0):.0f} mmHg
- SpO2: {vitals.get('spo2', 0):.0f}%
- RR: {vitals.get('rr', 0):.0f} /min
- Temp: {vitals.get('temp', 37.0):.1f}C
- AVPU: {vitals.get('avpu', 'A')}

History: {s.get('history', 'Unknown')}

WAITING ROOM: 12 patients | AVAILABLE BEDS: 4

What is your triage decision?"""

def get_response(cat):
    decisions = {
        1: ("Immediate (Red)", "send_to_resus", "Life-threatening presentation requiring immediate resuscitation."),
        2: ("Very Urgent (Orange)", "send_to_majors", "Time-critical condition. Requires senior review within 10 minutes."),
        3: ("Urgent (Yellow)", "send_to_majors", "Urgent presentation requiring assessment within 60 minutes."),
        4: ("Standard (Green)", "send_to_minors", "Stable presentation suitable for minor injuries/illness stream."),
        5: ("Non-urgent (Blue)", "refer_to_gp", "Non-urgent presentation. Redirect to primary care.")
    }
    name, action, reason = decisions[cat]
    return f"""TRIAGE DECISION:

Category: {cat} - {name}
Intervention: {action}

Clinical Reasoning: {reason}"""

# Create training dataset
training_data = []
for cat, scenarios in all_scenarios.items():
    for s in scenarios:
        try:
            example = {
                "instruction": "You are an expert A&E Triage Nurse using the Manchester Triage System. Assess the following patient and provide your triage decision with clinical reasoning.",
                "input": format_observation(s),
                "output": get_response(cat),
                "category": cat
            }
            training_data.append(example)
        except:
            continue

print(f"\nüìä Training examples created: {len(training_data)}")
for cat in range(1, 6):
    count = sum(1 for x in training_data if x['category'] == cat)
    print(f"   Category {cat}: {count}")

In [None]:
# Preview a sample
print("üìã Sample Generated Scenario (Category 1):\n")
sample = next(x for x in training_data if x['category'] == 1)
print(sample['input'][:500])

In [None]:
# Save new scenarios
with open('gpt_generated_scenarios.json', 'w') as f:
    json.dump(all_scenarios, f, indent=2)
print("‚úÖ Saved: gpt_generated_scenarios.json")

# Save as JSONL for training
with open('gpt_train.jsonl', 'w') as f:
    for example in training_data:
        f.write(json.dumps(example) + '\n')
print(f"‚úÖ Saved: gpt_train.jsonl ({len(training_data)} examples)")

In [None]:
# Merge with existing training data
import os

existing_path = '../data/train.jsonl'
if os.path.exists(existing_path):
    existing = [json.loads(l) for l in open(existing_path)]
    merged = existing + training_data
    
    with open('../data/train_expanded.jsonl', 'w') as f:
        for example in merged:
            f.write(json.dumps(example) + '\n')
    
    print(f"\nüìä MERGED DATASET:")
    print(f"   Original: {len(existing)}")
    print(f"   Generated: {len(training_data)}")
    print(f"   TOTAL: {len(merged)}")
    print(f"\n‚úÖ Saved: ../data/train_expanded.jsonl")
else:
    print("‚ö†Ô∏è No existing training data found")

In [None]:
# Download files
from google.colab import files
files.download('gpt_train.jsonl')
files.download('gpt_generated_scenarios.json')