# table 1
- mission type
- GCS
- NACA
- age
- patient sex
- physician sex
- experience of physician
- age physician
- missions per physician
- difficult terrain
- circulaiton insufficient
- breathing insufficient
- duration of mission (min)
- night time mission
- trauma
- VAS at scene 
- VAS at hospital admission
- VAS reduction 
- administered analgesic
-- fenta / fenta dose
-- morphine / morphine dose
-- ketamine / ketamine dose

In [None]:
import pandas as pd
import re
import numpy as np
from operator import itemgetter

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/Liste Notärzte-1.xlsx'
meta_medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/analysis/medreg_extraction/final_complete_extractions_20251001_190933.xlsx'

In [None]:
restrict_to_trauma = True
restrict_to_primary = True

In [None]:
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)
meta_medic_df = pd.read_excel(meta_medic_data_path)

In [None]:
pd.to_datetime(data_df["Erstbefund (Datum)"], format='%d.%m.%Y').min(), pd.to_datetime(data_df["Erstbefund (Datum)"], format='%d.%m.%Y').max()

In [None]:
medic_df['full_name'] = medic_df['Mitglieder mit Einsatzfunktion'].str.replace(' (Flugarzt/Flugärztin)', '')
medic_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'], inplace=True)
medic_df = medic_df.merge(meta_medic_df, how='left', on='full_name')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

In [None]:
duplicates = data_df[data_df["SNZ Ereignis Nr. "].duplicated()]["SNZ Ereignis Nr. "]
print(f'Duplicates found: {duplicates.nunique()}')
# drop duplicates
data_df = data_df.drop_duplicates(subset=["SNZ Ereignis Nr. "])

In [None]:
n_vas_under4 = data_df[data_df["VAS_on_scene"] <= 3].shape[0]
print(f'Excluded {n_vas_under4} patients with VAS <= 3')

# adult patients with vas <= 3
n_adult_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] >= 16)].shape[0]
print(f'Excluded {n_adult_vas_under4} adult patients with VAS <= 3')

# pediatric patients with vas <= 3
n_pediatric_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] < 16)].shape[0]
print(f'Excluded {n_pediatric_vas_under4} pediatric patients with VAS <= 3')

data_df = data_df[data_df["VAS_on_scene"] > 3]

In [None]:
data_df['Einteilung (reduziert)'].value_counts()

In [None]:
if restrict_to_trauma:
    n_non_trauma = data_df[data_df['Einteilung (reduziert)'] != 'Unfall'].shape[0]
    print(f'Excluded {n_non_trauma} non-trauma patients')

    # adult non-trauma patients
    n_adult_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_non_trauma} adult non-trauma patients')
    # pediatric non-trauma patients
    n_pediatric_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_non_trauma} pediatric non-trauma patients')

    data_df = data_df[data_df['Einteilung (reduziert)'] == 'Unfall']

In [None]:
if restrict_to_primary:
    n_secondary = data_df[data_df['Einsatzart'] != 'Primär'].shape[0]
    print(f'Excluded {n_secondary} secondary transport patients')

    # adult secondary transport patients
    n_adult_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_secondary} adult secondary transport patients')
    # pediatric secondary transport patients
    n_pediatric_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_secondary} pediatric secondary transport patients')
    data_df = data_df[data_df['Einsatzart'] == 'Primär']


In [None]:
(data_df["Alter "] >= 16).sum(), (data_df["Alter "] < 16).sum()

In [None]:
adult_df = data_df[data_df["Alter "] >= 16]
pediatric_df = data_df[data_df["Alter "] < 16]

In [None]:
# print all columns names containing "GCS" in any of data_df.columns
print([col for col in data_df.columns if "Geschlecht" in col])

In [None]:
def get_multi_label_counts(data_df, multi_label_column):
    data_df[multi_label_column] = data_df[multi_label_column].replace(999, pd.NA)
    label_counter = {}
    # iterate through the rows
    for index, row in data_df.iterrows():
        # split by comma then strip spaces
        labels = [label.strip() for label in str(row[multi_label_column]).split(',')]
        # if label not in the dict, add it
        for label in labels:
            if label == 'nan' or label == '<NA>':
                continue
            if label not in label_counter:
                label_counter[label] = 1
            else:
                label_counter[label] += 1

    # sort the dictionary by value
    sorted_label_counter = dict(sorted(label_counter.items(), key=lambda item: item[1], reverse=True))
    return sorted_label_counter

In [None]:
get_multi_label_counts(adult_df, 'Körperregion')

In [None]:
def preprocess_body_region(df):
    # create a new column Körperregion2 with the first entry of Körperregion
    df['Körperregion2'] = df['Körperregion'].str.split(',').str[0]
    # strip spaces
    df['Körperregion2'] = df['Körperregion2'].str.strip()
    # fill na in Körperregion with Körperregion2
    df['Körperregion'] = df['Körperregion'].fillna(df['Körperregion2'])
    # set to lower
    df['Körperregion'] = df['Körperregion'].str.lower()
    # replace kopf with schädel-hirn
    df['Körperregion'] = df['Körperregion'].replace({'kopf': 'schädel-hirn'})
    # replace 'kopf/gesicht' / 'augen' / 'kopf/hals' / 'kopf (gehör)' / 'kopf/gesicht/hals' with 'gesicht'
    df['Körperregion'] = df['Körperregion'].replace({'kopf/gesicht': 'gesicht', 'augen': 'gesicht', 'kopf/hals': 'gesicht', 'kopf (gehör)': 'gesicht', 'kopf/gesicht/hals': 'gesicht'})
    # replace 'rücken' with 'wirbelsäule'
    df['Körperregion'] = df['Körperregion'].replace({'rücken': 'wirbelsäule'})
    # replace 'rump' with 'thorax'
    df['Körperregion'] = df['Körperregion'].replace({'rump': 'thorax'})
    # replace  'obere extremiät' with 'obere extremität'    
    df['Körperregion'] = df['Körperregion'].replace({'obere extremiät': 'obere extremität'})
    # replace 'untere extermität' with 'untere extremität'    
    df['Körperregion'] = df['Körperregion'].replace({'untere extermität': 'untere extremität'})
    # replace '' with pd.NA
    df['Körperregion'] = df['Körperregion'].replace({'': pd.NA})

    return df

In [None]:
def get_categorical_str(df, column_name, category, total):
    """
    Get categorical string features from the DataFrame.
    """
    count = df[df[column_name] == category].shape[0]
    return f'{count} ({count/total:.1%})'

def get_continuous_str(df, column_name, total):
    """
    Get continuous string features from the DataFrame.
    """
    median = df[column_name].median()
    q1 = df[column_name].quantile(0.25)
    q3 = df[column_name].quantile(0.75)
    return f'{median:.1f} [{q1:.1f} - {q3:.1f}]'

def get_multi_label_counts(data_df, multi_label_column):
    data_df[multi_label_column] = data_df[multi_label_column].replace(999, pd.NA)
    label_counter = {}
    # iterate through the rows
    for index, row in data_df.iterrows():
        # split by comma then strip spaces
        labels = [label.strip() for label in re.split('; |, ', str(row[multi_label_column]))]
        # if label not in the dict, add it
        for label in labels:
            if label == 'nan' or label == '<NA>':
                continue
            if label not in label_counter:
                label_counter[label] = 1
            else:
                label_counter[label] += 1

    # sort the dictionary by value
    sorted_label_counter = dict(sorted(label_counter.items(), key=lambda item: item[1], reverse=True))
    return sorted_label_counter

In [None]:
def table1(df):
    """
    Create a summary table of the data.
    """
    pop_df = pd.DataFrame()
    str_df = pd.DataFrame()

    # get the number of patients
    n_patients = len(df)
    pop_df['n_patients'] = [n_patients]
    str_df['n_patients'] = [n_patients]

    # # mission type (primary vs secondary)
    # str_df['primary_mission'] = get_categorical_str(df, 'Einsatzart', 'Primär', n_patients)
    # str_df['secondary_mission'] = get_categorical_str(df, 'Einsatzart', 'Sekundär', n_patients)

    # df['insufficient_pain_management'] = (df['VAS_on_arrival'] > 3).astype(int)
    # str_df['insufficient_pain_management'] = get_categorical_str(df, 'insufficient_pain_management', 1, n_patients)

    # NACA (2, 3, 4, 5)
    for naca_level in [2, 3, 4, 5]:
        str_df[f'NACA {naca_level}'] = get_categorical_str(df, 'NACA (nummerisch)', naca_level, n_patients)

    # GCS (13, 14, 15)
    str_df['GCS 13'] = get_categorical_str(df, 'GCS', 13, n_patients)
    str_df['GCS 14'] = get_categorical_str(df, 'GCS', 14, n_patients)
    str_df['GCS 15'] = get_categorical_str(df, 'GCS', 15, n_patients)

    # - age
    str_df['age'] = get_continuous_str(df, 'Alter ', n_patients)

    # - patient sex
    str_df['patient_sex_male'] = get_categorical_str(df, 'Geschlecht', 'Männlich', n_patients)

    # - physician sex
    str_df['physician_sex_male'] = get_categorical_str(df, 'physician_sex', 'm', n_patients)
    # - experience of physician
    # - age physician
    # - missions per physician
    # get only year of event
    df['event_year'] = pd.to_datetime(df['Ereignisdatum'], format='%d.%m.%Y').dt.year
    df['physician_age'] = df['event_year'] - df['year_of_birth']
    # physician year of final exam (from licence_date which can be either d.m.Y or Y)
    df['physician_licence_year'] = df['licence_date'].apply(lambda x: str(x).split('.')[-1] if '.' in str(x) else str(x))
    df['phyisician_experience_years'] = df['event_year'] - pd.to_numeric(df['physician_licence_year'], errors='coerce')
    str_df['physician_age'] = get_continuous_str(df, 'physician_age', n_patients)
    str_df['physician_experience_years'] = get_continuous_str(df, 'phyisician_experience_years', n_patients)

    df['physician_anesthesiologist'] = df['specialist_qualifications'].str.contains('Anaesthesiology', na=False)
    str_df['physician_anesthesiologist'] = get_categorical_str(df, 'physician_anesthesiologist', True, n_patients)
    df['physician_intensivist'] = df['specialist_qualifications'].str.contains('Intensive care medicine', na=False)
    str_df['physician_intensivist'] = get_categorical_str(df, 'physician_intensivist', True, n_patients)
    df['physician_internist'] = df['specialist_qualifications'].str.contains('General Internal Medicine|General medical practitioner', na=False)
    str_df['physician_internist'] = get_categorical_str(df, 'physician_internist', True, n_patients)
    # df['physician_with_pain_certification'] = df['additional_qualifications'].str.contains('Interventional pain management (SSIPM)', na=False)

    # - difficult terrain
    # extraction with whinch
    extraction_methods = get_multi_label_counts(df, "Bergungen")
    
    n_whinch_extractions = np.sum(itemgetter(*[k for k, v in extraction_methods.items() if 'Winde' in k])(extraction_methods))
    str_df['n_whinch_extractions'] = f'{n_whinch_extractions} ({n_whinch_extractions/n_patients:.1%})'

    # - circulaiton insufficient
    # - breathing insufficient
    # - duration of mission (min)
    # mission time in minutes
    df['mission_duration'] = (pd.to_datetime(df['Übergabezeit'], format='%d.%m.%Y %H:%M:%S') - pd.to_datetime(df['Erstbefund'], format='%d.%m.%Y %H:%M:%S')).dt.total_seconds() / 60
    str_df['mission_duration'] = get_continuous_str(df, 'mission_duration', n_patients)
    
    # - night time mission
    str_df['night_time_mission'] = get_categorical_str(df, 'Tag oder Nacht', 'Nacht', n_patients)

    # - trauma
    n_trauma = get_multi_label_counts(df, "Einteilung (reduziert)")['Unfall']
    str_df['trauma'] = f'{n_trauma} ({n_trauma/n_patients:.1%})'

    # - VAS at scene 
    str_df['VAS_on_scene'] = get_continuous_str(df, 'VAS_on_scene', n_patients)
    # - VAS at hospital admission
    str_df['VAS_at_hospital_admission'] = get_continuous_str(df, 'VAS_on_arrival', n_patients)
    # - VAS reduction
    df['VAS_reduction'] = df['VAS_on_scene'] - df['VAS_on_arrival']
    str_df['VAS_reduction'] = get_continuous_str(df, 'VAS_reduction', n_patients)

    # - administered analgesic
    # -- fenta / fenta dose
    # -- morphine / morphine dose
    # -- ketamine / ketamine dose
    df['fentanyl_dose'] = 0
    df['ketamine_dose'] = 0
    df['esketamine_dose'] = 0
    df['morphine_dose'] = 0
    df['Alle Medikamente'] = df['Alle Medikamente'].str.replace(',', ';')  # replace commas with semicolons for consistency
    for i, row in df.iterrows():
        if pd.isna(row['Alle Medikamente']) or row['Alle Medikamente'] == 0:
            continue
        for analgetic in row['Alle Medikamente'].split(';'):
            if analgetic.strip() == '':
                continue
            # remove mcg or mg from dose
            if '7IE' in analgetic:
                    print(f"Skipping dose with 7IE: {analgetic}")
                    continue

            analgetic = analgetic.replace('mcg', '').replace('mg', '').strip()
            if 'Fentanyl' in analgetic and '/h' not in analgetic:
                dose = analgetic.split('Fentanyl')[-1].strip()
                df.at[i, 'fentanyl_dose'] += float(dose) 
            elif 'Fentanyl' in analgetic and '/h' in analgetic:
                dose = analgetic.split('Fentanyl')[-1].strip().replace('/h', '')
                dose = float(dose) * df.at[i, 'mission_duration']  
                df.at[i, 'fentanyl_dose'] += float(dose)
            elif 'Ketamin' in analgetic or 'Ketamine' in analgetic:
                dose = analgetic.split('Ketamin')[-1].strip()
                df.at[i, 'ketamine_dose'] += float(dose)
            elif 'Esketamin' in analgetic:
                dose = analgetic.split('Esketamin')[-1].strip()
                df.at[i, 'esketamine_dose'] += float(dose)
            elif 'Morphin' in analgetic or 'Morphine' in analgetic:
                dose = analgetic.split('Morphin')[-1].strip()
                df.at[i, 'morphine_dose'] += float(dose)

    # fentanyl given
    df['fentanyl_given'] = df['fentanyl_dose'] > 0
    str_df['fentanyl'] = get_categorical_str(df, 'fentanyl_given', True, n_patients)
    str_df['fentanyl_dose'] = get_continuous_str(df, 'fentanyl_dose', n_patients)

    # morphine given
    df['morphine_given'] = df['morphine_dose'] > 0
    str_df['morphine'] = get_categorical_str(df, 'morphine_given', True, n_patients)
    # str_df['morphine_dose'] = get_continuous_str(df, 'morphine_dose', n_patients)

    # ketamine given
    df['ketamine_given'] = df['ketamine_dose'] > 0
    # str_df['ketamine'] = get_categorical_str(df, 'ketamine_given', True, n_patients)
    # str_df['ketamine_dose'] = get_continuous_str(df, 'ketamine_dose', n_patients)

    # esketamine given
    df['esketamine_given'] = df['esketamine_dose'] > 0
    # str_df['esketamine'] = get_categorical_str(df, 'esketamine_given', True, n_patients)
    # str_df['esketamine_dose'] = get_continuous_str(df, 'esketamine_dose', n_patients)

    # any ketamine or esketamine
    df['any_ketamine_given'] = df['ketamine_given'] | df['esketamine_given']
    str_df['ketamine'] = get_categorical_str(df, 'any_ketamine_given', True, n_patients)
    df['any_ketamine_dose'] = df['ketamine_dose'] + df['esketamine_dose']
    # str_df['ketamine_dose'] = get_continuous_str(df, 'any_ketamine_dose', n_patients)

    return str_df.T

In [None]:
insufficient_adult_df = adult_df[adult_df['VAS_on_arrival'] > 3]
sufficient_adult_df = adult_df[adult_df['VAS_on_arrival'] <= 3]
insufficient_pediatric_df = pediatric_df[pediatric_df['VAS_on_arrival'] > 3]
sufficient_pediatric_df = pediatric_df[pediatric_df['VAS_on_arrival'] <= 3]

In [None]:
adult_table1 = table1(adult_df)
sufficient_adult_df = table1(sufficient_adult_df)
insufficient_adult_table1 = table1(insufficient_adult_df)

pediatric_table1 = table1(pediatric_df)
sufficient_pediatric_table1 = table1(sufficient_pediatric_df)
insufficient_pediatric_table1 = table1(insufficient_pediatric_df)

adult_table1, pediatric_table1

In [None]:
adult_t1 = pd.concat([adult_table1, sufficient_adult_df, insufficient_adult_table1], axis=1, keys=['Adult', 'Sufficient Adult', 'Insufficient Adult'])
adult_t1

In [None]:
# adult_t1.to_csv('/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/adult_table1.csv')
# pediatric_table1.to_csv('/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/pediatric_table1.csv')