# Medication preference per speciality

In [None]:
import pandas as pd
import re
import numpy as np

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/rega_physician_list_09102025.xlsx'
meta_medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/medreg_extraction/joined_final_complete_extractions_20251008_221735.xlsx'

In [None]:
restrict_to_trauma = True
restrict_to_primary = True

In [None]:
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)
meta_medic_df = pd.read_excel(meta_medic_data_path)

In [None]:
medic_df['full_name'] = medic_df['Mitglieder mit Einsatzfunktion'].str.replace(' (Flugarzt/Flugärztin)', '')
medic_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'], inplace=True)
medic_df = medic_df.merge(meta_medic_df, how='left', on='full_name')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

In [None]:
duplicates = data_df[data_df["SNZ Ereignis Nr. "].duplicated()]["SNZ Ereignis Nr. "]
print(f'Duplicates found: {duplicates.nunique()}')
# drop duplicates
data_df = data_df.drop_duplicates(subset=["SNZ Ereignis Nr. "])

In [None]:
n_vas_under4 = data_df[data_df["VAS_on_scene"] <= 3].shape[0]
print(f'Excluded {n_vas_under4} patients with VAS <= 3')

# adult patients with vas <= 3
n_adult_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] >= 16)].shape[0]
print(f'Excluded {n_adult_vas_under4} adult patients with VAS <= 3')

# pediatric patients with vas <= 3
n_pediatric_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] < 16)].shape[0]
print(f'Excluded {n_pediatric_vas_under4} pediatric patients with VAS <= 3')

data_df = data_df[data_df["VAS_on_scene"] > 3]

In [None]:
if restrict_to_trauma:
    n_non_trauma = data_df[data_df['Einteilung (reduziert)'] != 'Unfall'].shape[0]
    print(f'Excluded {n_non_trauma} non-trauma patients')

    # adult non-trauma patients
    n_adult_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_non_trauma} adult non-trauma patients')
    # pediatric non-trauma patients
    n_pediatric_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_non_trauma} pediatric non-trauma patients')

    data_df = data_df[data_df['Einteilung (reduziert)'] == 'Unfall']

In [None]:
if restrict_to_primary:
    n_secondary = data_df[data_df['Einsatzart'] != 'Primär'].shape[0]
    print(f'Excluded {n_secondary} secondary transport patients')

    # adult secondary transport patients
    n_adult_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_secondary} adult secondary transport patients')
    # pediatric secondary transport patients
    n_pediatric_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_secondary} pediatric secondary transport patients')
    data_df = data_df[data_df['Einsatzart'] == 'Primär']


In [None]:
adult_df = data_df[data_df["Alter "] >= 16]
pediatric_df = data_df[data_df["Alter "] < 16]

In [None]:
adult_df = adult_df[~adult_df['VAS_on_arrival'].isna()]

In [None]:
len(adult_df)

In [None]:
adult_df['event_year'] = pd.to_datetime(adult_df['Ereignisdatum'], format='%d.%m.%Y').dt.year
adult_df['physician_age'] = adult_df['event_year'] - adult_df['year_of_birth']
# physician year of final exam (from licence_date which can be either d.m.Y or Y)
adult_df['physician_licence_year'] = adult_df['licence_date'].apply(lambda x: str(x).split('.')[-1] if '.' in str(x) else str(x))
adult_df['phyisician_experience_years'] = adult_df['event_year'] - pd.to_numeric(adult_df['physician_licence_year'], errors='coerce')

adult_df['physician_anesthesiologist'] = adult_df['specialist_qualifications'].str.contains('Anaesthesiology', na=False)
adult_df['physician_intensivist'] = adult_df['specialist_qualifications'].str.contains('Intensive care medicine', na=False)
adult_df['physician_internist'] = adult_df['specialist_qualifications'].str.contains('General Internal Medicine|General medical practitioner', na=False)

In [None]:
# Create medication dose variables (matching Table 1 approach)
adult_df['fentanyl_dose'] = 0
adult_df['ketamine_dose'] = 0
adult_df['esketamine_dose'] = 0
adult_df['morphine_dose'] = 0
adult_df['Alle Medikamente'] = adult_df['Alle Medikamente'].str.replace(',', ';')  # replace commas with semicolons for consistency
for i, row in adult_df.iterrows():
    if pd.isna(row['Alle Medikamente']) or row['Alle Medikamente'] == 0:
        continue
    for analgetic in row['Alle Medikamente'].split(';'):
        if analgetic.strip() == '':
            continue
        # remove mcg or mg from dose
        if '7IE' in analgetic:
                print(f"Skipping dose with 7IE: {analgetic}")
                continue

        analgetic = analgetic.replace('mcg', '').replace('mg', '').strip()
        if 'Fentanyl' in analgetic and '/h' not in analgetic:
            dose = analgetic.split('Fentanyl')[-1].strip()
            adult_df.at[i, 'fentanyl_dose'] += float(dose) 
        elif 'Fentanyl' in analgetic and '/h' in analgetic:
            dose = analgetic.split('Fentanyl')[-1].strip().replace('/h', '')
            dose = float(dose) * adult_df.at[i, 'mission_duration']  
            adult_df.at[i, 'fentanyl_dose'] += float(dose)
        elif 'Ketamin' in analgetic or 'Ketamine' in analgetic:
            dose = analgetic.split('Ketamin')[-1].strip()
            adult_df.at[i, 'ketamine_dose'] += float(dose)
        elif 'Esketamin' in analgetic:
            dose = analgetic.split('Esketamin')[-1].strip()
            adult_df.at[i, 'esketamine_dose'] += float(dose)
        elif 'Morphin' in analgetic or 'Morphine' in analgetic:
            dose = analgetic.split('Morphin')[-1].strip()
            adult_df.at[i, 'morphine_dose'] += float(dose)

# Create medication variables
adult_df['fentanyl_given'] = adult_df['fentanyl_dose'] > 0
adult_df['morphine_given'] = adult_df['morphine_dose'] > 0
adult_df['ketamine_given'] = adult_df['ketamine_dose'] > 0
adult_df['esketamine_given'] = adult_df['esketamine_dose'] > 0

# Create combined medication variables (PRIMARY VARIABLES OF INTEREST)
adult_df['any_opiate_dose'] = adult_df['morphine_dose'] + adult_df['fentanyl_dose']
adult_df['any_ketamine_dose'] = adult_df['ketamine_dose'] + adult_df['esketamine_dose']
adult_df['any_opiate_given'] = (adult_df['morphine_dose'] > 0) | (adult_df['fentanyl_dose'] > 0)
adult_df['any_ketamine_given'] = (adult_df['ketamine_dose'] > 0) | (adult_df['esketamine_dose'] > 0)


In [None]:
# create boxplots with physician speciality on x axis and medication dose on y axis, split by subgroup (any_opiate, any_ketamine)
import seaborn as sns
import matplotlib.pyplot as plt

speciality = np.select(
    [adult_df["physician_anesthesiologist"], adult_df["physician_intensivist"], adult_df["physician_internist"]],
    ["Anesthesiologist", "Intensivist", "Internist"],
    default=np.nan,
)
plot_df = adult_df.copy()
plot_df["physician_speciality"] = speciality
plot_df = plot_df.dropna(subset=["physician_speciality"])

dose_long = plot_df.melt(
    id_vars=["physician_speciality"],
    value_vars=["any_opiate_dose", "any_ketamine_dose"],
    var_name="medication_group",
    value_name="dose",
)
dose_long = dose_long[dose_long["dose"].notna()]
dose_long["medication_group"] = dose_long["medication_group"].replace({
    "any_opiate_dose": "Any opiate",
    "any_ketamine_dose": "Any ketamine",
})

dose_long_plot = dose_long[dose_long["dose"] >= 0]

# Option to remove extremes above 99th percentile (per medication group)
remove_extremes = True
if remove_extremes:
    p99 = dose_long_plot.groupby("medication_group")["dose"].quantile(0.99)
    dose_long_plot = dose_long_plot[dose_long_plot.apply(
        lambda r: r["dose"] <= p99.loc[r["medication_group"]], axis=1
    )]

fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True)
order = ["Anesthesiologist", "Intensivist", "Internist"]

opiate_df = dose_long_plot[dose_long_plot["medication_group"] == "Any opiate"]
ketamine_df = dose_long_plot[dose_long_plot["medication_group"] == "Any ketamine"]

sns.violinplot(
    data=opiate_df,
    x="physician_speciality",
    y="dose",
    order=order,
    hue="physician_speciality",
    # dodge=False,
    # showfliers=False,
    ax=axes[0],
    cut=0
)
axes[0].set_xlabel("")
axes[0].set_ylabel("Opiate dose (mcg)")
if axes[0].legend_ is not None:
    axes[0].legend_.remove()
axes[0].spines["top"].set_visible(False)
axes[0].spines["right"].set_visible(False)
axes[0].yaxis.grid(True, linestyle="--", alpha=0.4)

sns.violinplot(
    data=ketamine_df,
    x="physician_speciality",
    y="dose",
    order=order,
    hue="physician_speciality",
    # dodge=False,
    # showfliers=False,
    ax=axes[1],
    cut=0
)
axes[1].set_xlabel("")
axes[1].set_ylabel("Ketamine dose (mg)")
if axes[1].legend_ is not None:
    axes[1].legend_.remove()
axes[1].spines["top"].set_visible(False)
axes[1].spines["right"].set_visible(False)
axes[1].yaxis.grid(True, linestyle="--", alpha=0.4)

plt.tight_layout()
plt.show()

In [None]:
# fig.savefig('/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/analysis/adult_trauma/medication_preference_per_speciality', dpi=300)