In [None]:
import pandas as pd
import numpy as np
from tableone import TableOne
import warnings
warnings.filterwarnings('ignore')

# Load and preprocess data (matching table1 and table2 approach)
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/Masterarbeit Analgesie_24.07.2025.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/Liste Notärzte-1.xlsx'

In [None]:
# Load and preprocess data
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)

# Merge with physician data
medic_df = medic_df.drop_duplicates('Mitglieder mit Einsatzfunktion')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

# Remove duplicates
data_df = data_df.drop_duplicates(subset=["SNZ Ereignis Nr. "])

# Filter patients with VAS > 3 at scene (initial pain condition)
data_df = data_df[data_df["VAS_on_scene"] > 3]

print(f"Total patients after filtering VAS > 3 at scene: {len(data_df)}")
print(f"Adult patients: {(data_df['Alter '] >= 16).sum()}")
print(f"Pediatric patients: {(data_df['Alter '] < 16).sum()}")

In [None]:
def process_medication_data(df):
    """
    Process medication data to create dose variables and binary indicators
    Parse from 'Alle Medikamente' column which contains medication strings
    """
    import re
    
    df_copy = df.copy()
    
    # Initialize medication dose columns
    df_copy['ketamine_dose'] = 0
    df_copy['esketamine_dose'] = 0  
    df_copy['morphine_dose'] = 0
    df_copy['fentanyl_dose'] = 0
    
    # Process medication data from 'Alle Medikamente' column
    if 'Alle Medikamente' in df_copy.columns:
        for i, row in df_copy.iterrows():
            medications = row.get('Alle Medikamente', '')
            if pd.notna(medications) and medications:
                # Convert to string if not already
                medications = str(medications)
                
                # Extract Morphine doses (mg)
                morphine_matches = re.findall(r'Morphin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
                for dose_str in morphine_matches:
                    try:
                        df_copy.at[i, 'morphine_dose'] += float(dose_str)
                    except:
                        pass
                
                # Extract Fentanyl doses (convert mcg to total mcg)
                fentanyl_matches = re.findall(r'Fentanyl[^;]*?(\d+(?:\.\d+)?)mcg', medications, re.IGNORECASE)
                for dose_str in fentanyl_matches:
                    try:
                        df_copy.at[i, 'fentanyl_dose'] += float(dose_str)
                    except:
                        pass
                
                # Extract Ketamine doses (mg) - excluding Esketamine
                ketamine_matches = re.findall(r'(?<!Es)ketamin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
                for dose_str in ketamine_matches:
                    try:
                        df_copy.at[i, 'ketamine_dose'] += float(dose_str)
                    except:
                        pass
                
                # Extract Esketamine doses (mg)
                esketamine_matches = re.findall(r'Esketamin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
                for dose_str in esketamine_matches:
                    try:
                        df_copy.at[i, 'esketamine_dose'] += float(dose_str)
                    except:
                        pass
    
    # Create combined medication variables
    df_copy['any_opiate_dose'] = df_copy['morphine_dose'] + df_copy['fentanyl_dose']
    df_copy['any_opiate_given'] = (df_copy['morphine_dose'] > 0) | (df_copy['fentanyl_dose'] > 0)
    df_copy['any_ketamine_given'] = (df_copy['ketamine_dose'] > 0) | (df_copy['esketamine_dose'] > 0)
    
    return df_copy

# Process medication data
data_df = process_medication_data(data_df)

print("Medication summary:")
print(f"Patients receiving opiates: {data_df['any_opiate_given'].sum()} ({data_df['any_opiate_given'].mean():.1%})")
print(f"Patients receiving ketamine: {data_df['any_ketamine_given'].sum()} ({data_df['any_ketamine_given'].mean():.1%})")
print(f"Patients receiving any analgesic: {(data_df['any_opiate_given'] | data_df['any_ketamine_given']).sum()} ({(data_df['any_opiate_given'] | data_df['any_ketamine_given']).mean():.1%})")

In [None]:
# Debug: Check available columns to identify correct column names
print("Available columns in dataset:")
all_cols = data_df.columns.tolist()
print(f"Total columns: {len(all_cols)}")

# Look for key columns we need
print("\nLooking for GCS-related columns:")
gcs_cols = [col for col in all_cols if 'gcs' in col.lower() or 'GCS' in col]
print(gcs_cols)

print("\nLooking for other key columns:")
key_patterns = ['Einsatzart', 'Geschlecht', 'physician', 'Berg', 'Tag', 'Nacht', 'Einteilung']
for pattern in key_patterns:
    matching_cols = [col for col in all_cols if pattern.lower() in col.lower()]
    if matching_cols:
        print(f"{pattern}: {matching_cols}")

print("\nFirst few column names:")
print(all_cols[:20])

In [None]:
# Create Table 3 comparison groups
# Group 1: Patients receiving analgesia during transport (any opiate OR any ketamine)
# Group 2: Patients with persistent untreated pain (no analgesic AND VAS > 3 at admission)

# Define comparison groups
data_df['received_analgesia'] = data_df['any_opiate_given'] | data_df['any_ketamine_given']
data_df['no_analgesia'] = ~data_df['received_analgesia']  
data_df['persistent_untreated_pain'] = data_df['no_analgesia'] & (data_df['VAS_on_arrival'] > 3)

# Create the main comparison variable for Table 3
data_df['table3_group'] = 'exclude'  # Initialize all as exclude
data_df.loc[data_df['received_analgesia'], 'table3_group'] = 'received_analgesia'
data_df.loc[data_df['persistent_untreated_pain'], 'table3_group'] = 'persistent_untreated_pain'

# Filter to only include the two comparison groups
table3_df = data_df[data_df['table3_group'] != 'exclude'].copy()

print("Table 3 Groups Summary:")
print(f"Patients receiving analgesia during transport: {(table3_df['table3_group'] == 'received_analgesia').sum()}")
print(f"Patients with persistent untreated pain: {(table3_df['table3_group'] == 'persistent_untreated_pain').sum()}")
print(f"Total patients included in Table 3: {len(table3_df)}")

# Create adult and pediatric subsets
adult_df = table3_df[table3_df['Alter '] >= 16].copy()
pediatric_df = table3_df[table3_df['Alter '] < 16].copy()

print(f"\nAdult patients:")
print(f"  Received analgesia: {(adult_df['table3_group'] == 'received_analgesia').sum()}")
print(f"  Persistent untreated pain: {(adult_df['table3_group'] == 'persistent_untreated_pain').sum()}")

print(f"\nPediatric patients:")
print(f"  Received analgesia: {(pediatric_df['table3_group'] == 'received_analgesia').sum()}")
print(f"  Persistent untreated pain: {(pediatric_df['table3_group'] == 'persistent_untreated_pain').sum()}")

In [None]:
# Debug: Check medication data more carefully
print("Debugging medication data:")
print(f"Total patients in data_df: {len(data_df)}")

# Check medication columns exist
med_cols = ['Morphin (mg)', 'Fentanyl (μg)', 'Ketamin (mg)', 'Esketamin (mg)']
for col in med_cols:
    if col in data_df.columns:
        non_null = data_df[col].notna().sum()
        print(f"{col}: {non_null} non-null values")
        if non_null > 0:
            print(f"  Sample values: {data_df[col].dropna().head()}")
    else:
        print(f"{col}: NOT FOUND")

# Check our processed variables
print(f"\nProcessed medication variables:")
print(f"any_opiate_given: {data_df['any_opiate_given'].sum()} patients")
print(f"any_ketamine_given: {data_df['any_ketamine_given'].sum()} patients")
print(f"Either opiate or ketamine: {(data_df['any_opiate_given'] | data_df['any_ketamine_given']).sum()} patients")

# Check actual medication columns in dataset
print(f"\nLooking for medication-related columns:")
med_related = [col for col in data_df.columns if any(med in col.lower() for med in ['morphin', 'fentanyl', 'ketamin', 'analg', 'schmerz', 'pain'])]
print(f"Found {len(med_related)} medication-related columns:")
for col in med_related[:10]:  # Show first 10
    print(f"  {col}")
if len(med_related) > 10:
    print(f"  ... and {len(med_related) - 10} more")

In [None]:
# Search for actual medication column names
print("Searching for medication columns:")
all_cols = data_df.columns.tolist()

# Search for various medication terms
med_terms = ['morphin', 'fentanyl', 'ketamin', 'analges', 'schmerz', 'pain', 'medikament', 'mg', 'μg', 'µg']
found_cols = []

for term in med_terms:
    matching = [col for col in all_cols if term.lower() in col.lower()]
    if matching:
        found_cols.extend(matching)
        print(f"\n'{term}' related columns:")
        for col in matching:
            print(f"  {col}")

# Remove duplicates and show unique medication-related columns
unique_med_cols = list(set(found_cols))
print(f"\nAll unique medication-related columns ({len(unique_med_cols)}):")
for col in sorted(unique_med_cols):
    print(f"  {col}")

# Check for dose/amount columns specifically
print(f"\nColumns containing dose/amount indicators:")
dose_cols = [col for col in all_cols if any(indicator in col.lower() for indicator in ['mg)', 'μg)', 'µg)', 'ml)', 'dose', 'dosis'])]
for col in dose_cols:
    print(f"  {col}")

In [None]:
# Check the actual medication columns
print("Checking medication data columns:")

# Check 'Alle Medikamente' column
if 'Alle Medikamente' in data_df.columns:
    print(f"\n'Alle Medikamente' column:")
    non_null = data_df['Alle Medikamente'].notna().sum()
    print(f"  Non-null values: {non_null}")
    if non_null > 0:
        print(f"  Sample values:")
        samples = data_df['Alle Medikamente'].dropna().head(10)
        for i, val in enumerate(samples):
            print(f"    {i+1}: {val}")

# Check 'Alle Medikamente detailliert' column  
if 'Alle Medikamente detailliert' in data_df.columns:
    print(f"\n'Alle Medikamente detailliert' column:")
    non_null = data_df['Alle Medikamente detailliert'].notna().sum()
    print(f"  Non-null values: {non_null}")
    if non_null > 0:
        print(f"  Sample values:")
        samples = data_df['Alle Medikamente detailliert'].dropna().head(5)
        for i, val in enumerate(samples):
            print(f"    {i+1}: {val}")

# Let's also check if there are specific medication columns we missed
print(f"\nChecking for specific medication mentions:")
# Look for columns that might contain specific medications
for med in ['Morphin', 'Fentanyl', 'Ketamin', 'Esketamin']:
    matching_cols = [col for col in data_df.columns if med.lower() in col.lower()]
    if matching_cols:
        print(f"{med}: {matching_cols}")
        for col in matching_cols:
            non_null = data_df[col].notna().sum()
            if non_null > 0:
                print(f"  {col}: {non_null} non-null values")
                print(f"    Sample: {data_df[col].dropna().head(3).tolist()}")
    else:
        print(f"{med}: No direct columns found")

In [None]:
# Test regex patterns on sample medication strings
import re

# Get some sample medication strings
sample_meds = data_df['Alle Medikamente'].dropna().head(5).tolist()

print("Testing regex patterns on sample medication strings:")
for i, med_string in enumerate(sample_meds):
    print(f"\nSample {i+1}: {med_string}")
    
    # Test each regex pattern
    morphine_matches = re.findall(r'Morphin[^;]*?(\d+(?:\.\d+)?)mg', med_string, re.IGNORECASE)
    fentanyl_matches = re.findall(r'Fentanyl[^;]*?(\d+(?:\.\d+)?)mcg', med_string, re.IGNORECASE)
    ketamine_matches = re.findall(r'(?<!Es)ketamin[^;]*?(\d+(?:\.\d+)?)mg', med_string, re.IGNORECASE)
    esketamine_matches = re.findall(r'Esketamin[^;]*?(\d+(?:\.\d+)?)mg', med_string, re.IGNORECASE)
    
    print(f"  Morphine: {morphine_matches}")
    print(f"  Fentanyl: {fentanyl_matches}")
    print(f"  Ketamine: {ketamine_matches}")
    print(f"  Esketamine: {esketamine_matches}")

# Let's also test simpler patterns
print(f"\n\nTesting simpler patterns:")
sample_med = sample_meds[0]
print(f"Testing on: {sample_med}")

# Test if 'Fentanyl' appears at all
if 'Fentanyl' in sample_med:
    print("✓ 'Fentanyl' found in string")
    # Try simpler pattern
    simple_fentanyl = re.findall(r'Fentanyl.*?(\d+)mcg', sample_med)
    print(f"  Simple Fentanyl pattern: {simple_fentanyl}")
else:
    print("✗ 'Fentanyl' not found")

if 'Ketamin' in sample_med:
    print("✓ 'Ketamin' found in string")
    simple_ketamine = re.findall(r'Ketamin.*?(\d+)mg', sample_med)
    print(f"  Simple Ketamine pattern: {simple_ketamine}")
else:
    print("✗ 'Ketamin' not found")

In [None]:
# Check current medication processing results
print("Checking current medication variables in data_df:")
print(f"Total patients: {len(data_df)}")
print(f"Patients with opiates: {data_df['any_opiate_given'].sum()}")
print(f"Patients with ketamine: {data_df['any_ketamine_given'].sum()}")
print(f"Patients with any analgesic: {(data_df['any_opiate_given'] | data_df['any_ketamine_given']).sum()}")

# Check dose totals
print(f"\nDose totals:")
print(f"Total morphine dose: {data_df['morphine_dose'].sum()}")
print(f"Total fentanyl dose: {data_df['fentanyl_dose'].sum()}")
print(f"Total ketamine dose: {data_df['ketamine_dose'].sum()}")
print(f"Total esketamine dose: {data_df['esketamine_dose'].sum()}")

# Check a few specific patients who should have medications
med_patients = data_df[data_df['Alle Medikamente'].notna()].head(5)
print(f"\nChecking specific patients with medications:")
for i, (idx, row) in enumerate(med_patients.iterrows()):
    print(f"\nPatient {i+1}:")
    print(f"  Medications: {row['Alle Medikamente'][:100]}...")
    print(f"  Morphine dose: {row['morphine_dose']}")
    print(f"  Fentanyl dose: {row['fentanyl_dose']}")
    print(f"  Ketamine dose: {row['ketamine_dose']}")
    print(f"  Esketamine dose: {row['esketamine_dose']}")
    print(f"  Any opiate: {row['any_opiate_given']}")
    print(f"  Any ketamine: {row['any_ketamine_given']}")

In [None]:
# Test medication processing function manually on a small sample
print("Testing medication processing function on a small sample:")

# Get a small sample with medications
sample_df = data_df[data_df['Alle Medikamente'].notna()].head(3).copy()
print(f"Sample size: {len(sample_df)}")

# Test the function
processed_sample = process_medication_data(sample_df)

print("\nBefore processing:")
for i, (idx, row) in enumerate(sample_df.iterrows()):
    print(f"Patient {i+1}: {row['Alle Medikamente'][:80]}...")

print("\nAfter processing:")
for i, (idx, row) in enumerate(processed_sample.iterrows()):
    print(f"Patient {i+1}:")
    print(f"  Medications: {row['Alle Medikamente'][:80]}...")
    print(f"  Morphine: {row['morphine_dose']}, Fentanyl: {row['fentanyl_dose']}")
    print(f"  Ketamine: {row['ketamine_dose']}, Esketamine: {row['esketamine_dose']}")
    print(f"  Any opiate: {row['any_opiate_given']}, Any ketamine: {row['any_ketamine_given']}")

# Check if the issue is that we need to reassign the processed data
print(f"\nTotal analgesic patients in processed sample: {(processed_sample['any_opiate_given'] | processed_sample['any_ketamine_given']).sum()}")

In [None]:
# Debug the medication processing function step by step
import re

# Take the first sample medication string
test_string = "Ondansetron (Ondansetron-Teva®) 4mg; Ketamin 20mg; Ketamin 30mg; Ketamin 20mg; Fentanyl 50mcg"
print(f"Testing on: {test_string}")

# Test each regex pattern individually
print("\nTesting individual regex patterns:")

# Fentanyl pattern
fentanyl_pattern = r'Fentanyl[^;]*?(\d+(?:\.\d+)?)mcg'
fentanyl_matches = re.findall(fentanyl_pattern, test_string, re.IGNORECASE)
print(f"Fentanyl pattern: {fentanyl_pattern}")
print(f"Fentanyl matches: {fentanyl_matches}")

# Ketamine pattern
ketamine_pattern = r'(?<!Es)ketamin[^;]*?(\d+(?:\.\d+)?)mg'
ketamine_matches = re.findall(ketamine_pattern, test_string, re.IGNORECASE)
print(f"Ketamine pattern: {ketamine_pattern}")
print(f"Ketamine matches: {ketamine_matches}")

# Now let's manually process one row to see what happens
print(f"\nManual processing simulation:")
sample_row = data_df[data_df['Alle Medikamente'].notna()].iloc[0]
medications = sample_row['Alle Medikamente']
print(f"Medication string: {medications}")

if pd.notna(medications) and medications:
    medications = str(medications)
    print(f"Converted to string: {type(medications)}")
    
    # Test fentanyl extraction
    fentanyl_matches = re.findall(r'Fentanyl[^;]*?(\d+(?:\.\d+)?)mcg', medications, re.IGNORECASE)
    print(f"Fentanyl matches: {fentanyl_matches}")
    
    total_fentanyl = 0
    for dose_str in fentanyl_matches:
        try:
            dose = float(dose_str)
            total_fentanyl += dose
            print(f"  Added fentanyl dose: {dose}")
        except Exception as e:
            print(f"  Error processing dose {dose_str}: {e}")
    
    print(f"Total fentanyl dose: {total_fentanyl}")
else:
    print("No medication data found")

In [None]:
# Create a simpler medication processing function and test it
def process_medications_simple(df):
    """Simple medication processing function for debugging"""
    import re
    
    df_copy = df.copy()
    
    # Initialize columns
    df_copy['morphine_dose'] = 0.0
    df_copy['fentanyl_dose'] = 0.0
    df_copy['ketamine_dose'] = 0.0
    df_copy['esketamine_dose'] = 0.0
    
    print(f"Processing {len(df_copy)} rows...")
    
    for i, row in df_copy.iterrows():
        medications = row.get('Alle Medikamente', '')
        if pd.notna(medications) and medications:
            medications = str(medications)
            
            # Fentanyl
            fentanyl_matches = re.findall(r'Fentanyl[^;]*?(\d+(?:\.\d+)?)mcg', medications, re.IGNORECASE)
            fentanyl_total = sum(float(dose) for dose in fentanyl_matches)
            df_copy.at[i, 'fentanyl_dose'] = fentanyl_total
            
            # Ketamine (not Esketamine)
            ketamine_matches = re.findall(r'(?<!Es)ketamin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
            ketamine_total = sum(float(dose) for dose in ketamine_matches)
            df_copy.at[i, 'ketamine_dose'] = ketamine_total
            
            # Esketamine
            esketamine_matches = re.findall(r'Esketamin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
            esketamine_total = sum(float(dose) for dose in esketamine_matches)
            df_copy.at[i, 'esketamine_dose'] = esketamine_total
            
            # Morphine
            morphine_matches = re.findall(r'Morphin[^;]*?(\d+(?:\.\d+)?)mg', medications, re.IGNORECASE)
            morphine_total = sum(float(dose) for dose in morphine_matches)
            df_copy.at[i, 'morphine_dose'] = morphine_total
    
    # Create binary indicators
    df_copy['any_opiate_given'] = (df_copy['morphine_dose'] > 0) | (df_copy['fentanyl_dose'] > 0)
    df_copy['any_ketamine_given'] = (df_copy['ketamine_dose'] > 0) | (df_copy['esketamine_dose'] > 0)
    
    return df_copy

# Test on a small sample
print("Testing simple medication processing:")
small_sample = data_df[data_df['Alle Medikamente'].notna()].head(2).copy()
processed = process_medications_simple(small_sample)

print(f"\nResults:")
for i, (idx, row) in enumerate(processed.iterrows()):
    print(f"Patient {i+1}:")
    print(f"  Medications: {row['Alle Medikamente'][:60]}...")
    print(f"  Fentanyl: {row['fentanyl_dose']}, Ketamine: {row['ketamine_dose']}")
    print(f"  Esketamine: {row['esketamine_dose']}, Morphine: {row['morphine_dose']}")
    print(f"  Any opiate: {row['any_opiate_given']}, Any ketamine: {row['any_ketamine_given']}")

In [None]:
# Apply the working medication processing to the full dataset
print("Applying medication processing to full dataset...")
data_df = process_medications_simple(data_df)

print(f"\nMedication processing results:")
print(f"Total patients: {len(data_df)}")
print(f"Patients with any opiate: {data_df['any_opiate_given'].sum()} ({data_df['any_opiate_given'].mean():.1%})")
print(f"Patients with any ketamine: {data_df['any_ketamine_given'].sum()} ({data_df['any_ketamine_given'].mean():.1%})")
print(f"Patients with any analgesic: {(data_df['any_opiate_given'] | data_df['any_ketamine_given']).sum()} ({(data_df['any_opiate_given'] | data_df['any_ketamine_given']).mean():.1%})")

print(f"\nDose totals:")
print(f"Total morphine dose: {data_df['morphine_dose'].sum():.1f} mg")
print(f"Total fentanyl dose: {data_df['fentanyl_dose'].sum():.1f} mcg")
print(f"Total ketamine dose: {data_df['ketamine_dose'].sum():.1f} mg")
print(f"Total esketamine dose: {data_df['esketamine_dose'].sum():.1f} mg")

In [None]:
def create_table_variables(df):
    """
    Create all variables needed for Table 3, matching Table 1 structure
    """
    df_copy = df.copy()
    
    # Primary mission (binary: Primary = 1, Secondary = 0)
    df_copy['primary_mission'] = (df_copy['Einsatzart'] == 'Primär').astype(int)
    
    # GCS categories (as categorical variable) - check for different possible column names
    gcs_columns = ['GCS vor Ort', 'GCS', 'GCS_vor_Ort', 'gcs_vor_ort']
    gcs_col = None
    for col in gcs_columns:
        if col in df_copy.columns:
            gcs_col = col
            break
    
    if gcs_col:
        df_copy['GCS'] = df_copy[gcs_col].astype(str)
    else:
        print("Warning: No GCS column found. Available columns:")
        print([col for col in df_copy.columns if 'gcs' in col.lower() or 'GCS' in col])
        df_copy['GCS'] = 'Unknown'
    
    # Patient demographics
    df_copy['male_patient'] = (df_copy['Geschlecht'] == 'Männlich').astype(int) 
    df_copy['male_physician'] = (df_copy['physician_sex'] == 'm').astype(int)
    
    # Mission characteristics
    df_copy['winch_extraction'] = df_copy['Bergungen'].str.contains('Winde', na=False).astype(int)
    df_copy['night_mission'] = (df_copy['Tag oder Nacht'] == 'Nacht').astype(int)
    
    # Trauma vs non-trauma
    df_copy['trauma'] = df_copy['Einteilung (reduziert)'].str.contains('Unfall', na=False).astype(int)
    
    # VAS scores
    df_copy['VAS_on_scene'] = df_copy['VAS_on_scene']
    df_copy['VAS_on_arrival'] = df_copy['VAS_on_arrival'] 
    df_copy['VAS_reduction'] = df_copy['VAS_on_scene'] - df_copy['VAS_on_arrival']
    
    return df_copy

# Apply variable creation to all datasets
table3_df = create_table_variables(table3_df)
adult_df = create_table_variables(adult_df)
pediatric_df = create_table_variables(pediatric_df)

print("Variable creation completed for Table 3 analysis")

## Table 3A: Adult Population (≥16 years)
### Comparison of Patients Receiving Analgesia vs. Persistent Untreated Pain

Similar to Table 1 structure but comparing:
- **Group 1**: Patients receiving analgesia during transport (any opiate OR any ketamine)
- **Group 2**: Patients with persistent untreated pain (no analgesic AND VAS > 3 at admission)

In [None]:
# Create Table 3A: Adult Population
print("=== TABLE 3A: ADULT POPULATION (≥16 years) ===")
print(f"Total adult patients: {len(adult_df)}")
print(f"Received analgesia: {(adult_df['table3_group'] == 'received_analgesia').sum()}")
print(f"Persistent untreated pain: {(adult_df['table3_group'] == 'persistent_untreated_pain').sum()}")

# Variables for Table 3 (matching Table 1 structure)
columns_adult = [
    'primary_mission',     # Primary mission type
    'GCS',                # Glasgow Coma Scale  
    'Alter ',             # Age
    'male_patient',       # Patient sex
    'male_physician',     # Physician sex
    'winch_extraction',   # Winch extraction
    'night_mission',      # Night mission
    'trauma',             # Trauma vs non-trauma
    'VAS_on_scene',       # VAS at scene
    'VAS_on_arrival',     # VAS at arrival
    'VAS_reduction',      # VAS reduction
    'any_opiate_given',   # Any opiate given
    'any_ketamine_given'  # Any ketamine given
]

# Categorical variables
categorical_adult = [
    'primary_mission', 'GCS', 'male_patient', 'male_physician', 
    'winch_extraction', 'night_mission', 'trauma', 
    'any_opiate_given', 'any_ketamine_given'
]

# Groupby variable
groupby_adult = 'table3_group'

# Generate TableOne for adult population
adult_table3 = TableOne(
    adult_df, 
    columns=columns_adult,
    categorical=categorical_adult,
    groupby=groupby_adult,
    pval=True,
    missing=False
)

print("\n" + "="*80)
print("TABLE 3A: ADULTS - Analgesia vs Persistent Untreated Pain")
print("="*80)
print(adult_table3)

## Table 3B: Pediatric Population (<16 years)
### Comparison of Patients Receiving Analgesia vs. Persistent Untreated Pain

Same comparison as adult table but for pediatric patients:
- **Group 1**: Patients receiving analgesia during transport (any opiate OR any ketamine)
- **Group 2**: Patients with persistent untreated pain (no analgesic AND VAS > 3 at admission)

In [None]:
# Create Table 3B: Pediatric Population
print("=== TABLE 3B: PEDIATRIC POPULATION (<16 years) ===")
print(f"Total pediatric patients: {len(pediatric_df)}")
print(f"Received analgesia: {(pediatric_df['table3_group'] == 'received_analgesia').sum()}")
print(f"Persistent untreated pain: {(pediatric_df['table3_group'] == 'persistent_untreated_pain').sum()}")

# Same variables as adult table
columns_pediatric = columns_adult.copy()
categorical_pediatric = categorical_adult.copy()
groupby_pediatric = 'table3_group'

# Generate TableOne for pediatric population
pediatric_table3 = TableOne(
    pediatric_df, 
    columns=columns_pediatric,
    categorical=categorical_pediatric,
    groupby=groupby_pediatric,
    pval=True,
    missing=False
)

print("\n" + "="*80)
print("TABLE 3B: PEDIATRIC - Analgesia vs Persistent Untreated Pain")
print("="*80)
print(pediatric_table3)

## Summary and Data Validation

In [None]:
# Summary statistics and validation
print("="*80)
print("TABLE 3 SUMMARY: PERSISTENT UNTREATED PAIN ANALYSIS")
print("="*80)

print("\nDefinitions:")
print("• Received Analgesia: Patients who received any opiate OR any ketamine during transport")
print("• Persistent Untreated Pain: Patients who received NO analgesic AND had VAS > 3 at admission")
print("• Initial Inclusion: All patients with VAS > 3 at scene")

print(f"\nOverall Population (VAS > 3 at scene):")
print(f"Total patients: {len(table3_df)}")
print(f"  Adults (≥16 years): {len(adult_df)} ({len(adult_df)/len(table3_df):.1%})")
print(f"  Pediatric (<16 years): {len(pediatric_df)} ({len(pediatric_df)/len(table3_df):.1%})")

print(f"\nComparison Groups:")
print(f"Received Analgesia: {(table3_df['table3_group'] == 'received_analgesia').sum()} ({(table3_df['table3_group'] == 'received_analgesia').mean():.1%})")
print(f"  Adults: {(adult_df['table3_group'] == 'received_analgesia').sum()} ({(adult_df['table3_group'] == 'received_analgesia').mean():.1%})")
print(f"  Pediatric: {(pediatric_df['table3_group'] == 'received_analgesia').sum()} ({(pediatric_df['table3_group'] == 'received_analgesia').mean():.1%})")

print(f"\nPersistent Untreated Pain: {(table3_df['table3_group'] == 'persistent_untreated_pain').sum()} ({(table3_df['table3_group'] == 'persistent_untreated_pain').mean():.1%})")
print(f"  Adults: {(adult_df['table3_group'] == 'persistent_untreated_pain').sum()} ({(adult_df['table3_group'] == 'persistent_untreated_pain').mean():.1%})")
print(f"  Pediatric: {(pediatric_df['table3_group'] == 'persistent_untreated_pain').sum()} ({(pediatric_df['table3_group'] == 'persistent_untreated_pain').mean():.1%})")

print(f"\nMedication Distribution:")
print(f"Any Opiate Given: {table3_df['any_opiate_given'].sum()} ({table3_df['any_opiate_given'].mean():.1%})")
print(f"Any Ketamine Given: {table3_df['any_ketamine_given'].sum()} ({table3_df['any_ketamine_given'].mean():.1%})")
print(f"Any Analgesic Given: {(table3_df['any_opiate_given'] | table3_df['any_ketamine_given']).sum()} ({(table3_df['any_opiate_given'] | table3_df['any_ketamine_given']).mean():.1%})")

print(f"\nVAS Score Validation:")
print(f"VAS at scene (mean ± SD): {table3_df['VAS_on_scene'].mean():.1f} ± {table3_df['VAS_on_scene'].std():.1f}")
print(f"VAS at arrival (mean ± SD): {table3_df['VAS_on_arrival'].mean():.1f} ± {table3_df['VAS_on_arrival'].std():.1f}")
print(f"VAS reduction (mean ± SD): {table3_df['VAS_reduction'].mean():.1f} ± {table3_df['VAS_reduction'].std():.1f}")

# Check for potential issues
persistent_pain_vas_check = table3_df[table3_df['table3_group'] == 'persistent_untreated_pain']['VAS_on_arrival']
print(f"\nValidation - Persistent Untreated Pain group:")
print(f"All have VAS > 3 at arrival: {(persistent_pain_vas_check > 3).all()}")
print(f"VAS at arrival range: {persistent_pain_vas_check.min():.1f} - {persistent_pain_vas_check.max():.1f}")

print("\n" + "="*80)
print("TABLE 3 ANALYSIS COMPLETE")
print("="*80)

Table 3. Persisting Untreated Pain

persisting  untreated  pain =   no  analgesia  during  HEMS  transport  and  NRS  >  3  at  hospital  admission