# Physician sex distribution

In [None]:
import pandas as pd
import re
import numpy as np

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/rega_physician_list_09102025.xlsx'
meta_medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/medreg_extraction/joined_final_complete_extractions_20251008_221735.xlsx'

In [None]:
restrict_to_trauma = True
restrict_to_primary = True

In [None]:
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)
meta_medic_df = pd.read_excel(meta_medic_data_path)

In [None]:
medic_df['full_name'] = medic_df['Mitglieder mit Einsatzfunktion'].str.replace(' (Flugarzt/Flugärztin)', '')
medic_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'], inplace=True)
medic_df = medic_df.merge(meta_medic_df, how='left', on='full_name')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

In [None]:
duplicates = data_df[data_df["SNZ Ereignis Nr. "].duplicated()]["SNZ Ereignis Nr. "]
print(f'Duplicates found: {duplicates.nunique()}')
# drop duplicates
data_df = data_df.drop_duplicates(subset=["SNZ Ereignis Nr. "])

In [None]:
data_df.head()

In [None]:
# overall physician sex distribution 
unique_physician_df = data_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'])



In [None]:
unique_physician_df['physician_sex'].value_counts(normalize=True)


In [None]:
# print overall distribution of active physicians n/n (%)
sex_counts = unique_physician_df['physician_sex'].dropna().value_counts()
sex_total = sex_counts.sum()
for sex, count in sex_counts.items():
    pct = 100 * count / sex_total if sex_total else 0
    print(f"{sex}: {count}/{sex_total} ({pct:.1f}%)")

In [None]:
n_vas_under4 = data_df[data_df["VAS_on_scene"] <= 3].shape[0]
print(f'Excluded {n_vas_under4} patients with VAS <= 3')

# adult patients with vas <= 3
n_adult_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] >= 16)].shape[0]
print(f'Excluded {n_adult_vas_under4} adult patients with VAS <= 3')

# pediatric patients with vas <= 3
n_pediatric_vas_under4 = data_df[(data_df["VAS_on_scene"] <= 3) & (data_df["Alter "] < 16)].shape[0]
print(f'Excluded {n_pediatric_vas_under4} pediatric patients with VAS <= 3')

data_df = data_df[data_df["VAS_on_scene"] > 3]

In [None]:
if restrict_to_trauma:
    n_non_trauma = data_df[data_df['Einteilung (reduziert)'] != 'Unfall'].shape[0]
    print(f'Excluded {n_non_trauma} non-trauma patients')

    # adult non-trauma patients
    n_adult_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_non_trauma} adult non-trauma patients')
    # pediatric non-trauma patients
    n_pediatric_non_trauma = data_df[(data_df['Einteilung (reduziert)'] != 'Unfall') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_non_trauma} pediatric non-trauma patients')

    data_df = data_df[data_df['Einteilung (reduziert)'] == 'Unfall']

In [None]:
if restrict_to_primary:
    n_secondary = data_df[data_df['Einsatzart'] != 'Primär'].shape[0]
    print(f'Excluded {n_secondary} secondary transport patients')

    # adult secondary transport patients
    n_adult_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] >= 16)].shape[0]
    print(f'Excluded {n_adult_secondary} adult secondary transport patients')
    # pediatric secondary transport patients
    n_pediatric_secondary = data_df[(data_df['Einsatzart'] != 'Primär') & (data_df["Alter "] < 16)].shape[0]
    print(f'Excluded {n_pediatric_secondary} pediatric secondary transport patients')
    data_df = data_df[data_df['Einsatzart'] == 'Primär']


In [None]:
adult_df = data_df[data_df["Alter "] >= 16]
pediatric_df = data_df[data_df["Alter "] < 16]

In [None]:
adult_df = adult_df[~adult_df['VAS_on_arrival'].isna()]

In [None]:
len(adult_df)

In [None]:
# print per mission distribution
mission_sex_counts = adult_df['physician_sex'].dropna().value_counts()
mission_total = mission_sex_counts.sum()
for sex, count in mission_sex_counts.items():
    pct = 100 * count / mission_total if mission_total else 0
    print(f"{sex}: {count}/{mission_total} ({pct:.1f}%)")

In [None]:
adult_df['event_year'] = pd.to_datetime(adult_df['Ereignisdatum'], format='%d.%m.%Y').dt.year
adult_df['physician_age'] = adult_df['event_year'] - adult_df['year_of_birth']
# physician year of final exam (from licence_date which can be either d.m.Y or Y)
adult_df['physician_licence_year'] = adult_df['licence_date'].apply(lambda x: str(x).split('.')[-1] if '.' in str(x) else str(x))
adult_df['phyisician_experience_years'] = adult_df['event_year'] - pd.to_numeric(adult_df['physician_licence_year'], errors='coerce')

adult_df['physician_anesthesiologist'] = adult_df['specialist_qualifications'].str.contains('Anaesthesiology', na=False)
adult_df['physician_intensivist'] = adult_df['specialist_qualifications'].str.contains('Intensive care medicine', na=False)
adult_df['physician_internist'] = adult_df['specialist_qualifications'].str.contains('General Internal Medicine|General medical practitioner', na=False)

In [None]:
# sex ratio among missions with anesth
anesth_mission_sex_counts = adult_df[adult_df['physician_anesthesiologist']]['physician_sex'].dropna().value_counts()
mission_total = anesth_mission_sex_counts.sum()
for sex, count in anesth_mission_sex_counts.items():
    pct = 100 * count / mission_total if mission_total else 0
    print(f"{sex}: {count}/{mission_total} ({pct:.1f}%)")

In [None]:
# sex ratio among missions with intensivist
intensivist_mission_sex_counts = adult_df[adult_df['physician_intensivist']]['physician_sex'].dropna().value_counts()
mission_total = intensivist_mission_sex_counts.sum()
for sex, count in intensivist_mission_sex_counts.items():
    pct = 100 * count / mission_total if mission_total else 0
    print(f"{sex}: {count}/{mission_total} ({pct:.1f}%)")

In [None]:
# sex ratio among missions with internist
internist_mission_sex_counts = adult_df[adult_df['physician_internist']]['physician_sex'].dropna().value_counts()
mission_total = internist_mission_sex_counts.sum()
for sex, count in internist_mission_sex_counts.items():
    pct = 100 * count / mission_total if mission_total else 0
    print(f"{sex}: {count}/{mission_total} ({pct:.1f}%)")

In [None]:
# create a bar chart with on the y axis percentage / on the x axis category: active physicians, all missions, anesthesiologists, intensivists, internists
# each bar should be split in subcategory: male / female
import matplotlib.pyplot as plt
from pathlib import Path

def sex_percentages(series):
    counts = series.dropna().value_counts()
    total = counts.sum()
    if total == 0:
        return pd.Series(dtype=float)
    return (counts / total * 100).sort_index()

categories = {
    "Active physicians": unique_physician_df["physician_sex"],
    "Analysed missions": adult_df["physician_sex"],
    "Anesthesiologists": adult_df.loc[adult_df["physician_anesthesiologist"], "physician_sex"],
    "Intensivists": adult_df.loc[adult_df["physician_intensivist"], "physician_sex"],
    "Internists": adult_df.loc[adult_df["physician_internist"], "physician_sex"],
}

# Build percentage table
pct_df = pd.DataFrame({name: sex_percentages(s) for name, s in categories.items()}).T
pct_df = pct_df.fillna(0)

# Normalize column labels to Male/Female and enforce order
label_map = {"m": "Male", "w": "Female", "male": "Male", "female": "Female", "M": "Male", "F": "Female"}
pct_df = pct_df.rename(columns=label_map)
ordered_cols = [c for c in ["Male", "Female"] if c in pct_df.columns]
ordered_cols += [c for c in pct_df.columns if c not in ordered_cols]
pct_df = pct_df[ordered_cols]

ax = pct_df.plot(kind="bar", stacked=True, figsize=(9, 5),
                 color=["#4C78A8", "#F58518", "#54A24B", "#B279A2"][:len(pct_df.columns)])
ax.set_ylabel("Percentage (%)")
ax.set_xlabel("")
ax.set_ylim(0, 100)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.yaxis.grid(True, linestyle="--", alpha=0.4)
plt.xticks(rotation=0)
ax.legend(title="", bbox_to_anchor=(1.02, 1), loc="upper left", frameon=False)

fig = ax.get_figure()

plt.tight_layout()
plt.show()

In [None]:
output_dir = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/analysis/adult_trauma'
# fig.savefig('/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/analysis/adult_trauma/physician_sex_distribution.png', dpi=300)
