In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
import statsmodels

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/intubation_trigger/data/Mastertabelle_filtered.xlsx'

In [None]:
df = pd.read_excel(data_path)

In [None]:
df.head()

In [None]:
df = df[df.Comment != 'Reanimation']

In [None]:
df['Einteilung (Hauptdiagnose)'].value_counts()

In [None]:
# find number of nans in column
df['Einteilung (Hauptdiagnose)'].isna().sum()

In [None]:
df['Diagnose präklinisch'].unique()

In [None]:
df['GCS (Erstbefund)'].value_counts()

In [None]:
# number of nans in GCS
df['GCS (Erstbefund)'].isna().sum()

In [None]:
df['Sverweis_Geschlecht_encoded'] = df['Sverweis_Geschlecht'].map({'männlich': 0, 'weiblich': 1})

## Overall GCS distribution: medical vs trauma

In [None]:
df[(df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].describe()

In [None]:
df[(df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].describe()

Uncorrected

In [None]:
from statsmodels.stats.nonparametric import rank_compare_2indep

In [None]:
res = rank_compare_2indep(df[(df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].values,
                    df[(df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].values)

In [None]:
res.pvalue

with correction for age / sex

In [None]:
# encode eineteilung
df['Einteilung_encoded'] = df['Einteilung (Hauptdiagnose)'].map({'Krankheit': 0, 'Unfall': 1})

In [None]:
from statsmodels.miscmodels.ordinal_model import OrderedModel

temp_df = df[['GCS (Erstbefund)', 'Einteilung_encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']]
temp_df.dropna(inplace=True)

mod_prob_med = OrderedModel(temp_df['GCS (Erstbefund)'], temp_df[['Einteilung_encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']], distr='probit')

In [None]:
res_prob_med = mod_prob_med.fit(maxiter=10000)

In [None]:
res_prob_med.summary()

In [None]:
# plot boxplot of GCS according to main diagnosis
fig = plt.figure(figsize=(10, 6))
ax = sns.boxplot(x='Einteilung (Hauptdiagnose)', y='GCS (Erstbefund)', data=df[df['Einteilung (Hauptdiagnose)'] != 'Keine Diagnose'],
                 showfliers=False, hue='Einteilung (Hauptdiagnose)', palette='viridis', legend=False)

# add annotation of p-value (line between boxes)
# get the x and y coordinates
x1, x2 = 0, 1
y, h, col = df['GCS (Erstbefund)'].max() + 0.4, 0.3, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h+0.1, f'p={res.pvalue:.3f}', ha='center', va='bottom', color=col)

ax.set_ylim(3, 17)
plt.ylabel('GCS')

# don't allow y_ticks > 15
plt.yticks(range(3, 16, 2))

plt.xticks(rotation=45)
# set x ticks
plt.xticks(ticks=[0, 1], labels=['Non-Trauma', 'Trauma'])
plt.xlabel('')

plt.show()

In [None]:
# fig.savefig('/Users/jk1/Downloads/gcs_intubation.png', dpi=600, bbox_inches='tight')

In [None]:
# build table with median (IQR) of GCS for each main diagnosis, and p-value

results_df = pd.DataFrame(columns=['Medical', 'Trauma', 'p-value'])
medical_df = df[(df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)']
trauma_df = df[(df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)']
results_df.loc['n'] = [len(medical_df), len(trauma_df), '']
results_df.loc['GCS (median/IQR)'] = [f'{medical_df.median()} ({medical_df.quantile(0.25)}-{medical_df.quantile(0.75)})',
                        f'{trauma_df.median()} ({trauma_df.quantile(0.25)}-{trauma_df.quantile(0.75)})',
                        res.pvalue]



In [None]:
results_df

In [None]:
# results_df.to_csv('/Users/jk1/Downloads/gcs_intubation.csv')

### GCS distribution: non-trauma vs trauma (excluding burns) 

In [None]:
no_burns_df = df[df['Kategorie (Hauptdiagnose)'] != 'Verbrennung']
no_burns_df.shape, df.shape

Uncorrected

In [None]:
no_burns_res = rank_compare_2indep(no_burns_df[(no_burns_df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (no_burns_df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].values,
                    no_burns_df[(no_burns_df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (no_burns_df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)'].values)

In [None]:
no_burns_res.pvalue

With correction

In [None]:
no_burns_temp_df = no_burns_df[['GCS (Erstbefund)', 'Einteilung_encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']]
no_burns_temp_df.dropna(inplace=True)

no_burns_mod_prob_med = OrderedModel(no_burns_temp_df['GCS (Erstbefund)'],
                            no_burns_temp_df[['Einteilung_encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']],
                            distr='probit')
no_burns_res_prob_med = no_burns_mod_prob_med.fit(maxiter=10000)

In [None]:
no_burns_res_prob_med.summary()

In [None]:
# plot boxplot of GCS according to main diagnosis
fig = plt.figure(figsize=(10, 6))

ax = sns.boxplot(x='Einteilung (Hauptdiagnose)', y='GCS (Erstbefund)', data=no_burns_df[no_burns_df['Einteilung (Hauptdiagnose)'] != 'Keine Diagnose'],
                    showfliers=False, hue='Einteilung (Hauptdiagnose)', palette='viridis', legend=False)

# add annotation of p-value (line between boxes)
# get the x and y coordinates
x1, x2 = 0, 1
y, h, col = no_burns_df['GCS (Erstbefund)'].max() + 0.4, 0.3, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h+0.1, f'p={no_burns_res.pvalue:.3f}', ha='center', va='bottom', color=col)

ax.set_ylim(3, 17)
plt.ylabel('GCS')

# don't allow y_ticks > 15
plt.yticks(range(3, 16, 2))

plt.xticks(rotation=45)
# set x ticks
plt.xticks(ticks=[0, 1], labels=['Non-Trauma', 'Trauma'])
plt.xlabel('')

plt.show()
                 

In [None]:
# fig.savefig('/Users/jk1/Downloads/gcs_intubation_no_burns.png', dpi=600, bbox_inches='tight')

In [None]:
# build table with median (IQR) of GCS for each main diagnosis, and p-value

no_burns_results_df = pd.DataFrame(columns=['Medical', 'Trauma', 'p-value'])
no_burns_medical_df = no_burns_df[(no_burns_df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (no_burns_df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)']
no_burns_trauma_df = no_burns_df[(no_burns_df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (no_burns_df['GCS (Erstbefund)'].notna())]['GCS (Erstbefund)']
no_burns_results_df.loc['n'] = [len(no_burns_medical_df), len(no_burns_trauma_df), '']
no_burns_results_df.loc['GCS (median/IQR)'] = [f'{no_burns_medical_df.median()} ({no_burns_medical_df.quantile(0.25)}-{no_burns_medical_df.quantile(0.75)})',
                        f'{no_burns_trauma_df.median()} ({no_burns_trauma_df.quantile(0.25)}-{no_burns_trauma_df.quantile(0.75)})',
                        no_burns_res.pvalue]

In [None]:
no_burns_results_df

In [None]:
# no_burns_results_df.to_csv('/Users/jk1/Downloads/gcs_intubation_no_burns.csv')

### Evaluation of subgroups in medical/trauma

Test: Kruskal-Wallis

In [None]:
df['Kategorie (Hauptdiagnose)'].unique()

In [None]:
df['Kategorie (Hauptdiagnose)'].isna().sum()

#### Medical

In [None]:
medical_df = df[(df['Einteilung (Hauptdiagnose)'] == 'Krankheit') & (df['GCS (Erstbefund)'].notna())]

In [None]:
# use kruskal-wallis to compare GCS between different categories
from statsmodels.sandbox.stats.multicomp import MultiComparison

In [None]:
multicomp = MultiComparison(medical_df['GCS (Erstbefund)'], medical_df['Kategorie (Hauptdiagnose)'])

In [None]:
res = multicomp.kruskal()
res

In [None]:
medical_df['Kategorie (Hauptdiagnose)'].value_counts()

In [None]:
medical_df['Kategorie (Hauptdiagnose) encoded'] = medical_df['Kategorie (Hauptdiagnose)'].map(
    {'Zentralnervensystem': 0,
     'Krampfanfall': 1,
        'Herz-Kreislauf': 2,
     'Intoxikation': 3,
     'Respiratorisch': 4,
     })

In [None]:
from statsmodels.miscmodels.ordinal_model import OrderedModel

mod_prob_med = OrderedModel(medical_df['GCS (Erstbefund)'], medical_df[['Kategorie (Hauptdiagnose) encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']],
                            distr='probit')

In [None]:
res_prob_med = mod_prob_med.fit(maxiter=10000)

In [None]:
res_prob_med.summary()

#### Trauma

In [None]:
trauma_df = df[(df['Einteilung (Hauptdiagnose)'] == 'Unfall') & (df['GCS (Erstbefund)'].notna())]

In [None]:
trauma_df['Kategorie (Hauptdiagnose)'].value_counts()

In [None]:
multicomp_trauma = MultiComparison(trauma_df['GCS (Erstbefund)'], trauma_df['Kategorie (Hauptdiagnose)'])

In [None]:
trauma_res = multicomp_trauma.kruskal()
trauma_res

In [None]:
trauma_df['Kategorie (Hauptdiagnose) encoded'] = trauma_df['Kategorie (Hauptdiagnose)'].map(
    {'SHT': 0,
        'Polytrauma': 1,
        'Verbrennung': 2,
        'Respiratorisch': 3,
     })

In [None]:
temp_df = trauma_df[['GCS (Erstbefund)', 'Kategorie (Hauptdiagnose) encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']]
temp_df.dropna(inplace=True)

mod_prob_trauma = OrderedModel(temp_df['GCS (Erstbefund)'], temp_df[['Kategorie (Hauptdiagnose) encoded', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']],
                            distr='probit')

In [None]:
res_prob_trauma = mod_prob_trauma.fit(maxiter=10000)

In [None]:
res_prob_trauma.summary()

In [None]:
import scikit_posthocs as sp

categories_gcs = [trauma_df[trauma_df['Kategorie (Hauptdiagnose)'] == cat]['GCS (Erstbefund)'] for cat in ['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch']]

dunn_res = sp.posthoc_dunn(categories_gcs, p_adjust='holm')

In [None]:
dunn_res.columns = ['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch']
dunn_res.index = ['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch']

In [None]:
dunn_res

### Boxplots for medical and trauma

In [None]:
# two subplots with boxplots for medical and trauma and every category

fig, axs = plt.subplots(1, 2, figsize=(15, 6))

sns.boxplot(x='Kategorie (Hauptdiagnose)', y='GCS (Erstbefund)', data=medical_df, showfliers=False, ax=axs[0], palette='mako',
            boxprops=dict(alpha=.8))
axs[0].set_title('Medical')

sns.boxplot(x='Kategorie (Hauptdiagnose)', y='GCS (Erstbefund)', data=trauma_df, showfliers=False, ax=axs[1], palette='magma',
            boxprops=dict(alpha=.8))
axs[1].set_title('Trauma')

# tilt x labels
for ax in axs:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=65)

# remove xaxis labels
axs[0].set_xlabel('')
axs[1].set_xlabel('')
plt.show()

In [None]:
# fig.savefig('/Users/jk1/Downloads/gcs_intubation_subgroups.png', dpi=600, bbox_inches='tight')

#### table with subgroups for medical and trauma

In [None]:
# build table with median (IQR) of GCS for each main diagnosis
medical_subgroups_df = pd.DataFrame(columns=['Zentralnervensystem', 'Krampfanfall', 'Herz-Kreislauf', 'Intoxikation', 'Respiratorisch'])
trauma_subgroups_df = pd.DataFrame(columns=['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch'])

medical_subgroups_df.loc['n'] = [len(medical_df[medical_df['Kategorie (Hauptdiagnose)'] == cat]) for cat in ['Zentralnervensystem', 'Krampfanfall', 'Herz-Kreislauf', 'Intoxikation', 'Respiratorisch']]
trauma_subgroups_df.loc['n'] = [len(trauma_df[trauma_df['Kategorie (Hauptdiagnose)'] == cat]) for cat in ['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch']]

for cat in ['Zentralnervensystem', 'Krampfanfall', 'Herz-Kreislauf', 'Intoxikation', 'Respiratorisch']:
    gcs_df = medical_df[medical_df['Kategorie (Hauptdiagnose)'] == cat]['GCS (Erstbefund)']
    medical_subgroups_df.loc['GCS (median/IQR)', cat] = f'{gcs_df.median()} ({gcs_df.quantile(0.25)}-{gcs_df.quantile(0.75)})'
    
for cat in ['SHT', 'Polytrauma', 'Verbrennung', 'Respiratorisch']:
    gcs_df = trauma_df[trauma_df['Kategorie (Hauptdiagnose)'] == cat]['GCS (Erstbefund)']
    trauma_subgroups_df.loc['GCS (median/IQR)', cat] = f'{gcs_df.median()} ({gcs_df.quantile(0.25)}-{gcs_df.quantile(0.75)})'


In [None]:
medical_subgroups_df = medical_subgroups_df.T
trauma_subgroups_df = trauma_subgroups_df.T

medical_subgroups_df.reset_index(inplace=True)
trauma_subgroups_df.reset_index(inplace=True)

medical_subgroups_df.columns = ['Kategorie', 'n', 'GCS (median/IQR)']
trauma_subgroups_df.columns = ['Kategorie', 'n', 'GCS (median/IQR)']

medical_subgroups_df['Einteilung'] = 'medical'
trauma_subgroups_df['Einteilung'] = 'trauma'

medical_subgroups_df['pval'] = res_prob_med.pvalues['Kategorie (Hauptdiagnose) encoded']
trauma_subgroups_df['pval'] = res_prob_trauma.pvalues['Kategorie (Hauptdiagnose) encoded']

subgroups_df = pd.concat([medical_subgroups_df, trauma_subgroups_df])

In [None]:
subgroups_df

In [None]:
# subgroups_df.to_csv('/Users/jk1/Downloads/gcs_intubation_subgroups.csv')

## overall model

In [None]:
overall_temp_df = df[['GCS (Erstbefund)', 'Einteilung (Hauptdiagnose)', 'Kategorie (Hauptdiagnose)', 'Sverweis_Geschlecht_encoded', 'Alter  (Patient)']]
# rename columns to remove spaces and brackets
overall_temp_df.columns = ['GCS', 'Einteilung', 'Kategorie', 'Geschlecht', 'Alter']

In [None]:
# patients with einteilung trauma and kategorie respiratorisch should have kategorie trauma_respiratorisch
overall_temp_df.loc[(overall_temp_df['Einteilung'] == 'Unfall') & (overall_temp_df['Kategorie'] == 'Respiratorisch'), 'Kategorie'] = 'Trauma_Respiratorisch'

In [None]:
# numerical encoding for Eineteilung and Kategorie
overall_temp_df['Einteilung_encoded'] = overall_temp_df['Einteilung'].map({'Krankheit': 0, 'Unfall': 1})
overall_temp_df['Kategorie_encoded'] = overall_temp_df['Kategorie'].map({'Zentralnervensystem': 0,
                                                                         'Krampfanfall': 1,
                                                                         'Herz-Kreislauf': 2,
                                                                         'Intoxikation': 3,
                                                                         'Respiratorisch': 4,
                                                                         'SHT': 5,
                                                                         'Polytrauma': 6,
                                                                         'Verbrennung': 7,
                                                                         'Trauma_Respiratorisch': 8})

In [None]:
overall_temp_df['GCS_cat'] = pd.Categorical(overall_temp_df['GCS'], categories=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ordered=True)
# then convert to strings categories
overall_temp_df['GCS_cat'] = overall_temp_df['GCS_cat'].astype(str)
# and then convert to strings categories
overall_temp_df['GCS_cat'] = pd.Categorical(overall_temp_df['GCS_cat'], categories=['3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'], ordered=True)

In [None]:
overall_temp_df.dropna(inplace=True)

In [None]:
overall_temp_df 

In [None]:
import os
os.environ["R_HOME"] = "/Library/Frameworks/R.framework/Resources"
from pymer4.models import Lmer

# model = Lmer(f"GCS ~ Kategorie_encoded + Geschlecht + Alter + (1|Einteilung_encoded)",
#                          data=overall_temp_df, family='binomial')
model = Lmer(f"GCS_cat ~ Kategorie_encoded + Geschlecht + Alter + (1|Einteilung_encoded)", data=overall_temp_df, family='binomial')
model.fit()