# Table 3. Factors associated with persisting untreated pain (Medical cases, all ages)

In [None]:
import pandas as pd
import numpy as np
import re
import statsmodels.api as sm
import warnings
from sklearn.preprocessing import StandardScaler
from utils.utils import _extract_venous_access_features
warnings.filterwarnings('ignore')

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/rega_data/rega_physician_list_09102025.xlsx'
meta_medic_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-UniversitédeGenève/icu_research/prehospital/analgesia/data/medreg_extraction/joined_final_complete_extractions_20251008_221735.xlsx'
restrict_to_primary = True
restrict_to_medical = True

In [None]:
data_df = pd.read_excel(data_path)
medic_df = pd.read_excel(medic_data_path)
meta_medic_df = pd.read_excel(meta_medic_data_path)

medic_df['full_name'] = medic_df['Mitglieder mit Einsatzfunktion'].str.replace(' (Flugarzt/Flugärztin)', '')
medic_df.drop_duplicates(subset=['Mitglieder mit Einsatzfunktion'], inplace=True)
medic_df = medic_df.merge(meta_medic_df, how='left', on='full_name')
medic_df.rename(columns={'Sex m/w': 'physician_sex'}, inplace=True)
data_df = data_df.merge(medic_df, how='left', left_on='Mitglieder mit Einsatzfunktion', right_on='Mitglieder mit Einsatzfunktion')

data_df = data_df.drop_duplicates(subset=['SNZ Ereignis Nr. '])
data_df = data_df[data_df['VAS_on_scene'] > 3]

n_missing_arrival = data_df['VAS_on_arrival'].isna().sum()
print(f'Excluded {n_missing_arrival} patients with missing VAS_on_arrival')
data_df = data_df.dropna(subset=['VAS_on_arrival'])

if restrict_to_medical:
    n_non_medical = data_df[data_df['Einteilung (reduziert)'] != 'Krankheit'].shape[0]
    print(f'Excluded {n_non_medical} non-medical patients')
    data_df = data_df[data_df['Einteilung (reduziert)'] == 'Krankheit']

if restrict_to_primary:
    n_secondary = data_df[data_df['Einsatzart'] != 'Primär'].shape[0]
    print(f'Excluded {n_secondary} secondary transport patients')
    data_df = data_df[data_df['Einsatzart'] == 'Primär']

if 'NACA' not in data_df.columns and 'NACA (nummerisch)' in data_df.columns:
    data_df['NACA'] = data_df['NACA (nummerisch)']

In [None]:
def univariate_logistic_regression(df, outcome_var, predictor_vars):
    results = []
    for var in predictor_vars:
        X = df[[var]].copy()
        y = df[outcome_var]
        X_with_const = sm.add_constant(X)
        try:
            model = sm.Logit(y, X_with_const).fit(disp=0)
            coef = model.params[var]
            or_value = np.exp(coef)
            ci_lower = np.exp(model.conf_int().loc[var, 0])
            ci_upper = np.exp(model.conf_int().loc[var, 1])
            p_value = model.pvalues[var]
            results.append({
                'Variable': var,
                'Coefficient': coef,
                'OR': or_value,
                'CI_lower': ci_lower,
                'CI_upper': ci_upper,
                'P_value': p_value,
                'OR_CI': f"{or_value:.2f} ({ci_lower:.2f}-{ci_upper:.2f})",
                'P_formatted': f"{p_value:.3f}" if p_value >= 0.001 else "<0.001"
            })
        except Exception as e:
            print(f"Error with variable {var}: {e}")
    return pd.DataFrame(results)

def multivariate_logistic_regression(df, outcome_var, predictor_vars, normalize=False):
    X = df[predictor_vars].copy()
    y = df[outcome_var]
    if normalize:
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        X = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)
    X_with_const = sm.add_constant(X)
    model = sm.Logit(y, X_with_const).fit(disp=0)
    results = []
    for var in X.columns:
        coef = model.params[var]
        or_value = np.exp(coef)
        ci_lower = np.exp(model.conf_int().loc[var, 0])
        ci_upper = np.exp(model.conf_int().loc[var, 1])
        p_value = model.pvalues[var]
        results.append({
            'Variable': var,
            'Coefficient': coef,
            'OR': or_value,
            'CI_lower': ci_lower,
            'CI_upper': ci_upper,
            'P_value': p_value,
            'OR_CI': f"{or_value:.2f} ({ci_lower:.2f}-{ci_upper:.2f})",
            'P_formatted': f"{p_value:.3f}" if p_value >= 0.001 else "<0.001"
        })
    return pd.DataFrame(results), model

def drop_zero_variance(df, vars_list):
    kept = [v for v in vars_list if df[v].nunique(dropna=True) > 1]
    dropped = [v for v in vars_list if v not in kept]
    if dropped:
        print(f'Dropped zero-variance variables: {dropped}')
    return kept

In [None]:
df = data_df.copy()
df['insufficient_pain_mgmt'] = (df['VAS_on_arrival'] > 3).astype(int)

df['age'] = df['Alter ']
df['male_patient'] = (df['Geschlecht'] == 'Männlich').astype(int)
df['male_physician'] = (df['physician_sex'] == 'm').astype(int)

df['event_year'] = pd.to_datetime(df['Ereignisdatum'], format='%d.%m.%Y').dt.year
df['physician_age'] = df['event_year'] - df['year_of_birth']
df['physician_licence_year'] = df['licence_date'].apply(lambda x: str(x).split('.')[-1] if '.' in str(x) else str(x))
df['physician_experience_years'] = df['event_year'] - pd.to_numeric(df['physician_licence_year'], errors='coerce')

df['physician_anesthesiologist'] = df['specialist_qualifications'].str.contains('Anaesthesiology', na=False).astype(int)
df['physician_intensivist'] = df['specialist_qualifications'].str.contains('Intensive care medicine', na=False).astype(int)
df['physician_internist'] = df['specialist_qualifications'].str.contains('General Internal Medicine|General medical practitioner', na=False).astype(int)

df['primary_mission'] = (df['Einsatzart'] == 'Primär').astype(int)
df['night_mission'] = (df['Tag oder Nacht'] == 'Nacht').astype(int)
df['winter_season'] = np.where(df['Monat'].isin(['Oktober', 'November', 'Dezember', 'Januar', 'Februar', 'März']), 1, 0).astype(int)
df['winch_extraction'] = df['Bergungen'].str.contains('Winde', na=False).astype(int)
df['vas_scene'] = df['VAS_on_scene']
df['mission_duration'] = (pd.to_datetime(df['Übergabezeit'], format='%d.%m.%Y %H:%M:%S') - pd.to_datetime(df['Erstbefund'], format='%d.%m.%Y %H:%M:%S')).dt.total_seconds() / 60

venous_access_features = _extract_venous_access_features(df['Zugänge'])
df = pd.concat([df, venous_access_features], axis=1)
df['no_venous_access'] = (df['venous_access_count'] == 0).astype(int)

df['fentanyl_dose'] = 0
df['ketamine_dose'] = 0
df['esketamine_dose'] = 0
df['morphine_dose'] = 0
df['Alle Medikamente'] = df['Alle Medikamente'].str.replace(',', ';')
for i, row in df.iterrows():
    if pd.isna(row['Alle Medikamente']) or row['Alle Medikamente'] == 0:
        continue
    for analgetic in row['Alle Medikamente'].split(';'):
        if analgetic.strip() == '':
            continue
        if '7IE' in analgetic:
            continue
        analgetic = analgetic.replace('mcg', '').replace('mg', '').strip()
        if 'Fentanyl' in analgetic and '/h' not in analgetic:
            dose = analgetic.split('Fentanyl')[-1].strip()
            df.at[i, 'fentanyl_dose'] += float(dose)
        elif 'Fentanyl' in analgetic and '/h' in analgetic:
            dose = analgetic.split('Fentanyl')[-1].strip().replace('/h', '')
            dose = float(dose) * df.at[i, 'mission_duration']
            df.at[i, 'fentanyl_dose'] += float(dose)
        elif 'Ketamin' in analgetic or 'Ketamine' in analgetic:
            dose = analgetic.split('Ketamin')[-1].strip()
            df.at[i, 'ketamine_dose'] += float(dose)
        elif 'Esketamin' in analgetic:
            dose = analgetic.split('Esketamin')[-1].strip()
            df.at[i, 'esketamine_dose'] += float(dose)
        elif 'Morphin' in analgetic or 'Morphine' in analgetic:
            dose = analgetic.split('Morphin')[-1].strip()
            df.at[i, 'morphine_dose'] += float(dose)

df['any_opiate_dose'] = df['morphine_dose'] + df['fentanyl_dose']
df['any_ketamine_dose'] = df['ketamine_dose'] + df['esketamine_dose']
df['any_opiate_given'] = (df['morphine_dose'] > 0) | (df['fentanyl_dose'] > 0)
df['any_ketamine_given'] = (df['ketamine_dose'] > 0) | (df['esketamine_dose'] > 0)
df['no_analgesic'] = ((df['any_opiate_given'] == 0) & (df['any_ketamine_given'] == 0)).astype(int)
df['persisting_untreated_pain'] = ((df['insufficient_pain_mgmt'] == 1) & (df['no_analgesic'] == 1)).astype(int)

model_vars = [
    'persisting_untreated_pain', 'age', 'NACA', 'male_patient', 'male_physician',
    'physician_age', 'physician_experience_years', 'physician_anesthesiologist',
    'physician_intensivist', 'physician_internist', 'mission_duration', 'primary_mission',
    'night_mission', 'winter_season', 'winch_extraction', 'vas_scene',
    'venous_access_count', 'no_venous_access'
]
model_vars = [v for v in model_vars if v in df.columns]
df_clean = df[model_vars].dropna()
predictor_vars = [v for v in model_vars if v != 'persisting_untreated_pain']
predictor_vars = drop_zero_variance(df_clean, predictor_vars)

print(f'Medical patients included: {len(df_clean)}')
print(f'Outcome rate (persisting untreated pain): {df_clean["persisting_untreated_pain"].mean():.1%}')

In [None]:
univariate_results = univariate_logistic_regression(df_clean, 'persisting_untreated_pain', predictor_vars)
multivariate_results, multivariate_model = multivariate_logistic_regression(df_clean, 'persisting_untreated_pain', predictor_vars)

univariate_results.sort_values('P_value')

In [None]:
multivariate_results.sort_values('P_value')