In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import alpha

In [None]:
intubated_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/intubation_trigger/data/Mastertabelle_filtered.xlsx'
non_intubated_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/intubation_trigger/data/not_intubated/GCS Distribution - nicht intubiert.xlsx'

In [None]:
intubated_df = pd.read_excel(intubated_data_path)
non_intubated_st_gallen_df = pd.read_excel(non_intubated_path, sheet_name='St. Gallen Rohdaten')
non_intubated_zurich_df = pd.read_excel(non_intubated_path, sheet_name='Zürich Rohdaten')
non_intubated_bern_df = pd.read_excel(non_intubated_path, sheet_name='Bern Rohdaten')

In [None]:
intubated_df = intubated_df[intubated_df.Comment != 'Reanimation']

In [None]:
# merge all non intubated dataframes
non_intubated_bern_df['Location'] = 'Bern'
non_intubated_st_gallen_df['Location'] = 'St. Gallen'
non_intubated_zurich_df['Location'] = 'Zurich'
non_intubated_df = pd.concat([non_intubated_bern_df, non_intubated_st_gallen_df, non_intubated_zurich_df])
non_intubated_df['Intubated'] = 0
non_intubated_df.rename(columns={'GCS Alle': 'GCS'}, inplace=True)

In [None]:
non_intubated_df.shape

In [None]:
non_intubated_trauma_df = non_intubated_df[['GCS Trauma', 'Intubated']]
non_intubated_trauma_df.rename(columns={'GCS Trauma': 'GCS'}, inplace=True)

non_intubated_non_trauma_df = non_intubated_df[['GCS Krankheit', 'Intubated']]
non_intubated_non_trauma_df.rename(columns={'GCS Krankheit': 'GCS'}, inplace=True)

In [None]:
intubated_df['Intubated'] = 1
intubated_df.rename(columns={'GCS (Erstbefund)': 'GCS'}, inplace=True)

In [None]:
non_intubated_df.head()

In [None]:
intubated_df.head()

In [None]:
overall_joined_df = pd.concat([intubated_df[['GCS', 'Intubated']], non_intubated_df[['GCS', 'Intubated']]]).reset_index()

In [None]:
trauma_joined_df = pd.concat([intubated_df[intubated_df['Einteilung (Hauptdiagnose)'] == 'Unfall'][['GCS', 'Intubated']], non_intubated_trauma_df]).reset_index()
non_trauma_joined_df = pd.concat([intubated_df[intubated_df['Einteilung (Hauptdiagnose)'] == 'Krankheit'][['GCS', 'Intubated']], non_intubated_non_trauma_df]).reset_index()

In [None]:
# plot distribution of GCS in intubated and non intubated patients
plt.figure(figsize=(10, 5))
ax = sns.histplot(data=overall_joined_df, x='GCS', hue='Intubated', bins=15, kde=True)

# set y axis to log scale
ax.set_yscale('log')

plt.show()

In [None]:
sns.color_palette('Set1')[0]

In [None]:
# plot distribution of GCS in intubated and non intubated patients (two subplots with trauma and non trauma patients)
fig, axs = plt.subplots(1, 2, figsize=(20, 5))
palette = [sns.color_palette('Set1')[1], sns.color_palette('Set1')[0]]
multiple = 'layer'

# trauma patients
ax = sns.histplot(data=trauma_joined_df, x='GCS', hue='Intubated', bins=13, kde=True, ax=axs[0], discrete=True, alpha=0.2, palette=palette, multiple=multiple)
ax.set_yscale('log')
ax.set_title('Trauma')

# show all GCS values
ax.set_xticks(np.arange(3, 16, 1))

# non trauma patients
ax = sns.histplot(data=non_trauma_joined_df, x='GCS', hue='Intubated', bins=13, kde=True, ax=axs[1], discrete=True, alpha=0.2, palette=palette, multiple=multiple)
ax.set_yscale('log')
ax.set_title('Non-Trauma')

# show all GCS values
ax.set_xticks(np.arange(3, 16, 1))

# set legend
# remove legend title
axs[0].get_legend().set_title('')
axs[1].get_legend().set_title('')
# set items of legend to Intubated and Non Intubated
axs[0].legend(axs[1].get_legend().legend_handles, ['Non Intubated', 'Intubated'])
axs[1].legend(axs[1].get_legend().legend_handles, ['Non Intubated', 'Intubated'])

# set y legend: Number of patients (log scale)
axs[0].set_ylabel('Number of patients (log scale)')
axs[1].set_ylabel('Number of patients (log scale)')

plt.show()

In [None]:
# fig.savefig('/Users/jk1/Downloads/gcs_intubated_vs_non_intubated.png', dpi=600, bbox_inches='tight')

In [None]:
# handles
axs[1].get_legend().legend_handles

In [None]:
handles

In [None]:
# get individual colors from palette
palette = sns.color_palette('Set1', n_colors=2)[0]
palette[0], palette[1]

In [None]:
# plot GCS distribution in intubated and non intubated patients of trauma patients excluding burn patients
trauma_no_burn_joined_df = pd.concat([intubated_df[intubated_df['Einteilung (Hauptdiagnose)'] == 'Unfall'][intubated_df['Kategorie (Hauptdiagnose)'] != 'Verbrennung'][['GCS', 'Intubated']], non_intubated_trauma_df]).reset_index()

plt.figure(figsize=(10, 5))
ax = sns.histplot(data=trauma_no_burn_joined_df, x='GCS', hue='Intubated', bins=13, kde=True, discrete=True, alpha=0.5)

# set y axis to log scale
ax.set_yscale('log')

# set title
plt.title('Trauma (excluding burn injuries)')

# show all GCS values
plt.xticks(np.arange(3, 16, 1))

plt.show()

In [None]:
# trauma, non trauma and burns
# change overlap color or put next to each other

In [None]:
# find a cutoff for GCS to predict intubation in trauma patients (excluding burn patients)

# compute c statistic for every GCS

from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

trauma_roc_auc_scores = []
trauma_fpr = []
trauma_tpr = []
trauma_thresholds = []

for gcs_threshold in np.arange(3, 16, 1):
    y_true = trauma_no_burn_joined_df.Intubated
    y_score = trauma_no_burn_joined_df.GCS <= gcs_threshold
    trauma_roc_auc_scores.append(roc_auc_score(y_true, y_score))
    fpr_, tpr_, thresholds_ = roc_curve(y_true, y_score)
    trauma_fpr.append(fpr_)
    trauma_tpr.append(tpr_)
    trauma_thresholds.append(thresholds_)
    
non_trauma_roc_auc_scores = []
non_trauma_fpr = []
non_trauma_tpr = []
non_trauma_thresholds = []

for gcs_threshold in np.arange(3, 16, 1):
    y_true = non_trauma_joined_df.Intubated
    y_score = non_trauma_joined_df.GCS <= gcs_threshold
    non_trauma_roc_auc_scores.append(roc_auc_score(y_true, y_score))
    fpr_, tpr_, thresholds_ = roc_curve(y_true, y_score)
    non_trauma_fpr.append(fpr_)
    non_trauma_tpr.append(tpr_)
    non_trauma_thresholds.append(thresholds_)
    
    
# plot ROC AUC scores
plt.figure(figsize=(10, 5))

ax = sns.lineplot(x=np.arange(3, 16, 1), y=trauma_roc_auc_scores, label='Trauma (excluding burn injuries)', color='red')
ax = sns.lineplot(x=np.arange(3, 16, 1), y=non_trauma_roc_auc_scores, label='Non Trauma', color='blue', ax=ax)

plt.xlabel('GCS threshold')
plt.ylabel('ROC AUC score')
plt.title('ROC AUC score for different GCS thresholds')

plt.show()


In [None]:
# overall roc auc score of GCS as a predictor of intubation (excluding burn patients)
trauma_no_burn_joined_df.dropna(inplace=True)
non_trauma_joined_df.dropna(inplace=True)
trauma_roc_auc = roc_auc_score(trauma_no_burn_joined_df.Intubated, trauma_no_burn_joined_df.GCS)
non_trauma_roc_auc = roc_auc_score(non_trauma_joined_df.Intubated, non_trauma_joined_df.GCS)

print(1 - trauma_roc_auc, 1 - non_trauma_roc_auc)

## Patients with GCS < 9

In [None]:
# plot only patients with GCS < 9
plt.figure(figsize=(10, 5))
ax = sns.histplot(data=overall_joined_df[overall_joined_df.GCS < 9], x='GCS', hue='Intubated', bins=6, kde=True, discrete=True)

# set y axis to log scale
ax.set_yscale('log')

# show all GCS values
plt.xticks(np.arange(3, 9, 1))

# center bins on integers
plt.xlim(2.5, 8.5)

plt.show()


In [None]:
# separate trauma and non trauma patients
fig, axs = plt.subplots(1, 2, figsize=(20, 5))

# trauma patients
ax = sns.histplot(data=trauma_joined_df[trauma_joined_df.GCS < 9], x='GCS', hue='Intubated', bins=6, kde=True, discrete=True, ax=axs[0])
# ax.set_yscale('log')
ax.set_title('Trauma')
ax.set_xticks(np.arange(3, 9, 1))

# non trauma patients
ax = sns.histplot(data=non_trauma_joined_df[non_trauma_joined_df.GCS < 9], x='GCS', hue='Intubated', bins=6, kde=True, discrete=True, ax=axs[1])
# ax.set_yscale('log')
ax.set_title('Non Trauma')
ax.set_xticks(np.arange(3, 9, 1))

plt.show()


In [None]:
# test ordinal shift in GCS distribution in trauma patients with GCS < 9
from statsmodels.stats.nonparametric import rank_compare_2indep

intubated_trauma_gcs = trauma_joined_df[trauma_joined_df.Intubated == 1].GCS
non_intubated_trauma_gcs = trauma_joined_df[trauma_joined_df.Intubated == 0].GCS

# drop nans
intubated_trauma_gcs = intubated_trauma_gcs.dropna()
non_intubated_trauma_gcs = non_intubated_trauma_gcs.dropna()

# only consider patients with GCS < 9
intubated_trauma_gcs = intubated_trauma_gcs[intubated_trauma_gcs < 9]
non_intubated_trauma_gcs = non_intubated_trauma_gcs[non_intubated_trauma_gcs < 9]

# test ordinal shift
rank_compare_2indep(intubated_trauma_gcs, non_intubated_trauma_gcs)

In [None]:
# test ordinal shift in GCS distribution in non trauma patients with GCS < 9
intubated_non_trauma_gcs = non_trauma_joined_df[non_trauma_joined_df.Intubated == 1].GCS
non_intubated_non_trauma_gcs = non_trauma_joined_df[non_trauma_joined_df.Intubated == 0].GCS

# drop nans
intubated_non_trauma_gcs = intubated_non_trauma_gcs.dropna()
non_intubated_non_trauma_gcs = non_intubated_non_trauma_gcs.dropna()

# only consider patients with GCS < 9
intubated_non_trauma_gcs = intubated_non_trauma_gcs[intubated_non_trauma_gcs < 9]
non_intubated_non_trauma_gcs = non_intubated_non_trauma_gcs[non_intubated_non_trauma_gcs < 9]

# test ordinal shift
rank_compare_2indep(intubated_non_trauma_gcs, non_intubated_non_trauma_gcs)