# Meta data

In [None]:
import pandas as pd
import re
import numpy as np
from operator import itemgetter

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/rega_physician_list_09102025.xlsx'
meta_medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/medreg_extraction/joined_final_complete_extractions_20251008_221735.xlsx'

In [None]:
restrict_to_trauma = True
restrict_to_primary = True

In [None]:
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)
meta_medic_df = pd.read_excel(meta_medic_data_path)

In [None]:
medic_df['full_name'] = medic_df['Mitglieder mit Einsatzfunktion'].str.replace(' (Flugarzt/Flugärztin)', '')
medic_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'], inplace=True)
medic_df = medic_df.merge(meta_medic_df, how='left', on='full_name')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

In [None]:
duplicates = data_df[data_df["SNZ Ereignis Nr. "].duplicated()]["SNZ Ereignis Nr. "]
print(f'Duplicates found: {duplicates.nunique()}')
# drop duplicates
data_df = data_df.drop_duplicates(subset=["SNZ Ereignis Nr. "])

In [None]:
n_vas_under4 = data_df[data_df["VAS_on_scene"] <= 3].shape[0]
print(f'Excluded {n_vas_under4} patients with VAS <= 3')

# adult patients with vas <= 3
n_adult_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] >= 16)].shape[0]
print(f'Excluded {n_adult_vas_under4} adult patients with VAS <= 3')

# pediatric patients with vas <= 3
n_pediatric_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] < 16)].shape[0]
print(f'Excluded {n_pediatric_vas_under4} pediatric patients with VAS <= 3')

data_df = data_df[data_df["VAS_on_scene"] > 3]

In [None]:
if restrict_to_trauma:
    n_non_trauma = data_df[data_df['Einteilung (reduziert)'] != 'Unfall'].shape[0]
    print(f'Excluded {n_non_trauma} non-trauma patients')

    # adult non-trauma patients
    n_adult_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_non_trauma} adult non-trauma patients')
    # pediatric non-trauma patients
    n_pediatric_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_non_trauma} pediatric non-trauma patients')

    data_df = data_df[data_df['Einteilung (reduziert)'] == 'Unfall']

In [None]:
if restrict_to_primary:
    n_secondary = data_df[data_df['Einsatzart'] != 'Primär'].shape[0]
    print(f'Excluded {n_secondary} secondary transport patients')

    # adult secondary transport patients
    n_adult_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_secondary} adult secondary transport patients')
    # pediatric secondary transport patients
    n_pediatric_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_secondary} pediatric secondary transport patients')
    data_df = data_df[data_df['Einsatzart'] == 'Primär']


In [None]:
adult_df = data_df[data_df["Alter "] >= 16]
pediatric_df = data_df[data_df["Alter "] < 16]

In [None]:
# insufficient analgesia == adult_df['VAS_on_arrival'] > 3
# sufficient analgesia == adult_df['VAS_on_arrival'] <= 3

# compute the percentage of insufficient and sufficient analgesia in adult patients along with confidence intervals (obtained via bootstrapping, 1000 resamples)
rng = np.random.default_rng(42)
vas_arrival = adult_df['VAS_on_arrival'].dropna()
insufficient_mask = vas_arrival > 3
sufficient_mask = vas_arrival <= 3

n_adult = len(vas_arrival)
n_insufficient = int(insufficient_mask.sum())
n_sufficient = int(sufficient_mask.sum())

p_insufficient = n_insufficient / n_adult if n_adult else np.nan
p_sufficient = n_sufficient / n_adult if n_adult else np.nan

n_boot = 1000
boot_insufficient = np.empty(n_boot)
boot_sufficient = np.empty(n_boot)

for i in range(n_boot):
    sample = rng.choice(vas_arrival.to_numpy(), size=n_adult, replace=True)
    boot_insufficient[i] = (sample > 3).mean()
    boot_sufficient[i] = (sample <= 3).mean()

ci_low, ci_high = 2.5, 97.5
ins_ci = np.percentile(boot_insufficient, [ci_low, ci_high])
suf_ci = np.percentile(boot_sufficient, [ci_low, ci_high])

print(f"Adult patients with VAS_on_arrival available: {n_adult}")
print(f"Insufficient analgesia: {n_insufficient}/{n_adult} ({p_insufficient*100:.1f}%) [{ins_ci[0]*100:.1f}%, {ins_ci[1]*100:.1f}%]")
print(f"Sufficient analgesia: {n_sufficient}/{n_adult} ({p_sufficient*100:.1f}%) [{suf_ci[0]*100:.1f}%, {suf_ci[1]*100:.1f}%]")

In [None]:
adult_df