# Association of intubation status with outcomes in comatose patients


In [None]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import numpy as np
from scipy.stats import alpha

os.environ["R_HOME"] = "/Library/Frameworks/R.framework/Resources"

from lifelines import CoxPHFitter

In [None]:
intubated_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/intubation_trigger/data/Mastertabelle_filtered.xlsx'
non_intubated_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/intubation_trigger/data/not_intubated/outcome_data/non_intubated_comatose_outcome_df.csv'

In [None]:
intubated_df = pd.read_excel(intubated_data_path)
intubated_df = intubated_df[intubated_df.Comment != 'Reanimation']

In [None]:
non_intubated_df = pd.read_csv(non_intubated_data_path)

In [None]:
outcomes_of_interest = [
    "ICU [d]",
    "Ventilator [d]",
    "Hospital stay [d]",
    "28 d Survival [y/n]",
    "ITN ass. Pneumonia [y/n]"
]
time_columns = ["ICU [d]",
                "Ventilator [d]",
                "Hospital stay [d]"]

In [None]:
# in every column, if value ends with +, remove it
for col in time_columns:
    intubated_df[col] = pd.to_numeric(intubated_df[col].apply(lambda x: str(x).replace("+", '')), errors='coerce')
    non_intubated_df[col] = pd.to_numeric(non_intubated_df[col], errors='coerce')

In [None]:
intubated_df['Einteilung_encoded'] = intubated_df['Einteilung (Hauptdiagnose)'].map({'Krankheit': 0, 'Unfall': 1})
non_intubated_df['Einteilung_encoded'] = non_intubated_df['Einteilung (Hauptdiagnose)'].map({'Krankheit': 0, 'Unfall': 1})

In [None]:
intubated_df['Sverweis_Geschlecht_encoded'] = intubated_df['Sverweis_Geschlecht'].map({'männlich': 0, 'weiblich': 1})
intubated_df.rename(columns={'Sverweis_Geschlecht_encoded': 'sex_encoded'}, inplace=True)
intubated_df['28 d Survival [y/n]_encoded'] = intubated_df['28 d Survival [y/n]'].map({'n': 0, 'y': 1})
intubated_df['ICU Survival [y/n]_encoded'] = intubated_df['ICU Survival [y/n]'].map({'n': 0, 'y': 1})
intubated_df['Spital Survival [y/n]_encoded'] = intubated_df['Spital Survival [y/n]'].map({'n': 0, 'y': 1})
intubated_df['ITN ass. Pneumonia [y/n]_encoded'] = intubated_df['ITN ass. Pneumonia [y/n]'].map({'n': 0, 'y': 1})
intubated_df.columns = intubated_df.columns.str.replace(' ', '_')

intubated_df['Einteilung_encoded'] = intubated_df['Einteilung_(Hauptdiagnose)'].map({'Krankheit': 0, 'Unfall': 1})

In [None]:
non_intubated_df['sex_encoded'] = non_intubated_df['sex'].str.lower().map({'m': 0, 'f': 1, 'w': 1})
non_intubated_df['28 d Survival [y/n]_encoded'] = non_intubated_df['28 d Survival [y/n]'].str.lower().map({'n': 0, 'y': 1})
non_intubated_df['ICU Survival [y/n]_encoded'] = non_intubated_df['ICU Survival [y/n]'].str.lower().map({'n': 0, 'y': 1})
non_intubated_df['Spital Survival [y/n]_encoded'] = non_intubated_df['Spital Survival [y/n]'].str.lower().map({'n': 0, 'y': 1})
non_intubated_df['ITN ass. Pneumonia [y/n]_encoded'] = non_intubated_df['ITN ass. Pneumonia [y/n]'].str.lower().map({'n': 0, 'y': 1})
non_intubated_df.columns = non_intubated_df.columns.str.replace(' ', '_')

In [None]:
non_intubated_df['intubated'] = 0
intubated_df['intubated'] = 1

In [None]:
non_intubated_df['Einteilung_(Hauptdiagnose)'].value_counts()

In [None]:
intubated_df['Einteilung_(Hauptdiagnose)'].value_counts()

In [None]:
columns_to_retain = ['Alter__(Patient)', 'sex_encoded', 'NACA_(Medizinische_Daten)', 'Einteilung_encoded', 'GCS_(Erstbefund)', 'ICU_[d]',
       'Ventilator_[d]', 'Hospital_stay_[d]', '28_d_Survival_[y/n]_encoded', 'ICU_Survival_[y/n]_encoded',
       'Spital_Survival_[y/n]_encoded', 'ITN_ass._Pneumonia_[y/n]_encoded', 'intubated']

In [None]:
joined_df = pd.concat([intubated_df[columns_to_retain], non_intubated_df[columns_to_retain]])

In [None]:
joined_df.intubated.value_counts()

### ICU LOS

In [None]:
# survival model
cph = CoxPHFitter()
icu_los_df = joined_df[["ICU_[d]", 'GCS_(Erstbefund)', 'Alter__(Patient)', 'sex_encoded', 'Einteilung_encoded', 'intubated', 'ICU_Survival_[y/n]_encoded', 'NACA_(Medizinische_Daten)']]
icu_los_df.columns = ['ICU_los', 'GCS', 'age', 'sex', 'category', 'intubated', 'icu_survival', 'NACA']
icu_los_df['icu_death'] = -1 * icu_los_df['icu_survival'] + 1
icu_los_df.drop(columns='icu_survival', inplace=True)

print(f'number of nan (excluded): {icu_los_df.isna().sum().sum()}')
print(f'{icu_los_df.intubated.value_counts()}')
icu_los_df.dropna(inplace=True)

cph.fit(icu_los_df, duration_col='ICU_los', event_col='icu_death')
cph.print_summary()

In [None]:
# print ICU LOS in non intubated vs intubated patients as median [IQR]
print(f'Non intubated: {icu_los_df[icu_los_df.intubated == 0].ICU_los.median()} [{icu_los_df[icu_los_df.intubated == 0].ICU_los.quantile(0.25)}-{icu_los_df[icu_los_df.intubated == 0].ICU_los.quantile(0.75)}]')

In [None]:
ax = sns.boxplot(x='intubated', y='ICU_los', data=icu_los_df, hue='intubated')
ax.set_ylim(0, 50)
ax.set_ylabel('ICU los')

plt.show()

### Hospital LOS

In [None]:
# survival model
hospital_stay_df = joined_df[["Hospital_stay_[d]", 'GCS_(Erstbefund)', 'Alter__(Patient)', 'sex_encoded', 'Einteilung_encoded', 'intubated', 'Spital_Survival_[y/n]_encoded', 'NACA_(Medizinische_Daten)']]
hospital_stay_df.columns = ['hospital_stay', 'GCS', 'age', 'sex', 'category', 'intubated', 'hospital_survival', 'NACA']
hospital_stay_df['hospital_death'] = -1 * hospital_stay_df['hospital_survival'] + 1
hospital_stay_df.drop(columns='hospital_survival', inplace=True)

print(f'number of nan (excluded): {hospital_stay_df.isna().sum().sum()}')
hospital_stay_df.dropna(inplace=True)

cph = CoxPHFitter()
cph.fit(hospital_stay_df, duration_col='hospital_stay', event_col='hospital_death')
cph.print_summary()

In [None]:
# print hospital LOS in non intubated vs intubated patients as median [IQR]
print(f'Non intubated: {hospital_stay_df[hospital_stay_df.intubated == 0].hospital_stay.median()} [{hospital_stay_df[hospital_stay_df.intubated == 0].hospital_stay.quantile(0.25)}, {hospital_stay_df[hospital_stay_df.intubated == 0].hospital_stay.quantile(0.75)}]')

In [None]:
ax = sns.boxplot(x='intubated', y='hospital_stay', data=hospital_stay_df, hue='intubated')
ax.set_ylim(0, 50)
ax.set_title('Hospital LOS')

plt.show()

### Ventilator days

In [None]:
# survival model
# Here, we treat icu_survival == 1 (extubation) as the event, and icu_survival == 0 (death) as censored.
ventilator_days_df = joined_df[["Ventilator_[d]", 'GCS_(Erstbefund)', 'Alter__(Patient)', 'sex_encoded', 'Einteilung_encoded', 'intubated', 'ICU_Survival_[y/n]_encoded', 'NACA_(Medizinische_Daten)']]
ventilator_days_df.columns = ['ventilator_days', 'GCS', 'age', 'sex', 'category', 'intubated', 'icu_survival', 'NACA']
ventilator_days_df['icu_death'] = -1 * ventilator_days_df['icu_survival'] + 1
ventilator_days_df.drop(columns='icu_survival', inplace=True)

print(f'number of nan (excluded): {ventilator_days_df.isna().sum().sum()}')
print(f'{ventilator_days_df.intubated.value_counts()}')
ventilator_days_df.dropna(inplace=True)

# Prepare the DataFrame for the model.
# We use 'ventilator_days' as the duration, 'event' as the event indicator, and include relevant covariates.
df_model = ventilator_days_df[['ventilator_days', 'icu_death', 'GCS', 'age', 'sex', 'category', 'intubated', 'NACA']].copy()

# Fit the cause-specific Cox model.
# In this model, death is treated as a censoring event.
cph = CoxPHFitter()
cph.fit(df_model, duration_col='ventilator_days', event_col='icu_death')
cph.print_summary()

In [None]:
# ventilator_days_df.to_csv('/Users/jk1/Downloads/ventilator_days_df.csv', index=False)

In [None]:
# print median [IQR] ventilator days in non intubated patients
print(f'Non intubated: {ventilator_days_df[ventilator_days_df.intubated == 0].ventilator_days.median()} [{ventilator_days_df[ventilator_days_df.intubated == 0].ventilator_days.quantile(0.25)}, {ventilator_days_df[ventilator_days_df.intubated == 0].ventilator_days.quantile(0.75)}]')

In [None]:
ax = sns.boxplot(x='intubated', y='ventilator_days', data=ventilator_days_df, hue='intubated')
ax.set_ylim(0, 50)
ax.set_ylabel('Ventilator days')

plt.show()

### 28d Mortality

In [None]:
# multiple logistic regression
survival_df = joined_df[["28_d_Survival_[y/n]_encoded", 'GCS_(Erstbefund)', 'Alter__(Patient)', 'sex_encoded',  'Einteilung_encoded', 'intubated', 'NACA_(Medizinische_Daten)']]
survival_df.columns = ['survival', 'GCS', 'age', 'sex', 'category', 'intubated', 'NACA']
print(f'number of nan (excluded): {survival_df.isna().sum().sum()}')
survival_df.dropna(inplace=True)

# If 'category' is a categorical variable, ensure it is treated as such.
survival_df['intubated'] = survival_df['intubated'].astype('category')

# Define the logistic regression formula.
# Adjusting for ventilator_days, along with GCS, age, and intubated.
formula = "survival ~ GCS + age + NACA + C(intubated) + sex + category"

# Fit the logistic regression model.
model = smf.logit(formula, data=survival_df).fit()

# Print the summary of the model.
print(model.summary())

In [None]:
# print 28d mortality in non intubated vs intubated patients as number (percentage)
print(f'Non intubated: {survival_df[survival_df.intubated == 0].survival.value_counts()} ({survival_df[survival_df.intubated == 0].survival.value_counts(normalize=True).values[1] * 100:.2f}%)')

In [None]:
print(f'Intubated: {survival_df[survival_df.intubated == 1].survival.value_counts()} ({survival_df[survival_df.intubated == 1].survival.value_counts(normalize=True).values[1] * 100:.2f}%)')

In [None]:
# plot 28d mortality in non intubated vs intubated patients

(survival_df
.groupby('intubated')['survival']
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.catplot,'data'), x='intubated',y='percent',hue='survival',kind='bar'))

plt.show()

### Pneumonia

In [None]:
# multiple logistic regression

pneumonia_df = joined_df[['ITN_ass._Pneumonia_[y/n]_encoded', 'GCS_(Erstbefund)', 'Alter__(Patient)', 'sex_encoded', 'Einteilung_encoded', 'intubated', 'Ventilator_[d]', 'Hospital_stay_[d]', 'NACA_(Medizinische_Daten)']]
pneumonia_df.columns = ['pneumonia', 'GCS', 'age', 'sex', 'category', 'intubated', 'ventilator_days', 'hospital_los', 'NACA']

# If 'category' is a categorical variable, ensure it is treated as such.
pneumonia_df['intubated'] = pneumonia_df['intubated'].astype('category')

# Define the logistic regression formula.
# Adjusting for hospitals days, along with GCS, age, and intubation status.
formula = "pneumonia ~ GCS + age + sex + NACA + C(intubated) + hospital_los + category"

# Fit the logistic regression model.
model = smf.logit(formula, data=pneumonia_df).fit()

# Print the summary of the model.
print(model.summary())

In [None]:
# print pneumonia occurence in non intubated patients as number (percentage)
print(f'Non intubated pneumonia: {pneumonia_df[pneumonia_df.intubated==0].pneumonia.sum()} ({pneumonia_df[pneumonia_df.intubated==0].pneumonia.sum() * 100 / pneumonia_df[pneumonia_df.intubated==0].value_counts().sum():.2f}%)')

In [None]:
# plot pneumonia counts in non intubated vs intubated patients

(pneumonia_df
.groupby('intubated')['pneumonia']
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.catplot,'data'), x='intubated',y='percent',hue='pneumonia',kind='bar'))

plt.show()


In [None]:
# plot all plots (5)
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

palette = sns.color_palette('Set3')
sns.boxplot(x='intubated', y='ICU_los', data=icu_los_df, hue='intubated', ax=axes[0, 0], palette=palette, boxprops=dict(alpha=.7),
            legend=False)
axes[0, 0].set_ylim(0, 50)
axes[0, 0].set_ylabel('days')
axes[0, 0].set_title('ICU LOS')

sns.boxplot(x='intubated', y='hospital_stay', data=hospital_stay_df, hue='intubated', ax=axes[0, 1], palette=palette, boxprops=dict(alpha=.7),
            legend=False)
axes[0, 1].set_ylim(0, 50)
axes[0, 1].set_ylabel('days')
axes[0, 1].set_title('Hospital LOS')

sns.boxplot(x='intubated', y='ventilator_days', data=ventilator_days_df, hue='intubated', ax=axes[0, 2], palette=palette, boxprops=dict(alpha=.7),
            legend=False)
axes[0, 2].set_ylim(0, 50)
axes[0, 2].set_ylabel('days')
axes[0, 2].set_title('Ventilator days')

# conversion to integer
survival_df['survival'] = survival_df['survival'].astype('Int64')
(survival_df
.groupby('intubated')['survival']
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.barplot,'data'), x='intubated',y='percent',hue='survival', ax=axes[1, 0], palette=
                            reversed(sns.color_palette('Set2')[0:2]), alpha=0.7)
)
axes[1, 0].set_title('Mortality (28 days)')
axes[1, 0].set_ylim(0, 100)
# set yticks to percentage with "%"
axes[1, 0].set_yticklabels([f'{int(i)}%' for i in axes[1, 0].get_yticks()])
axes[1, 0].set_ylabel('')

# set legend labels to yes / no
survival_handles, survival_labels = axes[1, 0].get_legend_handles_labels()
axes[1, 0].legend(survival_handles, ['No', 'Yes'], title='Survival')

# add absolute counts to the bars
no_survival_labels = [f'n = {survival_df[survival_df.intubated==0].survival.value_counts().sort_index()[0]}',
                     f'n = {survival_df[survival_df.intubated==1].survival.value_counts().sort_index()[0]}']
survival_labels = [f'n = {survival_df[survival_df.intubated==0].survival.value_counts().sort_index()[1]}',
                        f'n = {survival_df[survival_df.intubated==1].survival.value_counts().sort_index()[1]}']

axes[1, 0].bar_label(container=axes[1, 0].containers[0], labels=no_survival_labels, label_type='edge')
axes[1, 0].bar_label(container=axes[1, 0].containers[1], labels=survival_labels, label_type='edge')

pneumonia_df.pneumonia = pneumonia_df.pneumonia.astype('Int64')
(pneumonia_df
.groupby('intubated')['pneumonia']
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.barplot,'data'), x='intubated',y='percent',hue='pneumonia', ax=axes[1, 1], palette=
                                 reversed(sns.color_palette('Set1')[0:2]), alpha=0.7)
)
axes[1, 1].set_title('Pneumonia')
axes[1, 1].set_ylim(0, 100)
# set yticks to percentage with "%"
axes[1, 1].set_yticklabels([f'{int(i)}%' for i in axes[1, 0].get_yticks()])
axes[1, 1].set_ylabel('')

# set legend labels to yes / no
pneumonia_handles, pneumonia_labels = axes[1, 1].get_legend_handles_labels()
axes[1, 1].legend(pneumonia_handles, ['No', 'Yes'], title='Pneumonia')

# add absolute counts to the bars
no_pneumonia_labels = [f'n = {pneumonia_df[pneumonia_df.intubated==0].pneumonia.value_counts().sort_index()[0]}',
                          f'n = {pneumonia_df[pneumonia_df.intubated==1].pneumonia.value_counts().sort_index()[0]}']
with_pneumonia_labels = [f'n = {pneumonia_df[pneumonia_df.intubated==0].pneumonia.value_counts().sort_index()[1]}',
                          f'n = {pneumonia_df[pneumonia_df.intubated==1].pneumonia.value_counts().sort_index()[1]}']
axes[1, 1].bar_label(container=axes[1, 1].containers[0], labels=no_pneumonia_labels, label_type='edge')
axes[1, 1].bar_label(container=axes[1, 1].containers[1], labels=with_pneumonia_labels, label_type='edge')

axes[1, 2].axis('off')

# set x ticks for all to 'Non intubated', 'Intubated'
for ax in axes.flatten():
    ax.set_xticklabels(['Non intubated', 'Intubated'])
    ax.set_xlabel('')

plt.show()


In [None]:
# save figure
# fig.savefig('/Users/jk1/Downloads/outcomes_intubated_vs_non_intubated.png', dpi=600, bbox_inches='tight')

In [None]:
# isolate only pneumonia subfigure
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

(pneumonia_df
.groupby('intubated')['pneumonia']
.value_counts(normalize=True)
.mul(100)
.rename('percent')
.reset_index()
.pipe((sns.barplot,'data'), x='intubated',y='percent',hue='pneumonia', ax=ax, palette=
                                 reversed(sns.color_palette('Set1')[0:2]), alpha=0.7)
)

ax.set_title('')
ax.set_ylim(0, 100)
# set yticks to percentage with "%"
ax.set_yticklabels([f'{int(i)}%' for i in ax.get_yticks()])
ax.set_ylabel('')
# set xticks to 'Non intubated', 'Intubated'
ax.set_xticklabels(['Non intubated', 'Intubated'])
ax.set_xlabel('')
# set legend labels to yes / no
pneumonia_handles, pneumonia_labels = ax.get_legend_handles_labels()
ax.legend(pneumonia_handles, ['No', 'Yes'], title='Pneumonia')

# add absolute counts to the bars
no_pneumonia_labels = [f'n = {pneumonia_df[pneumonia_df.intubated==0].pneumonia.value_counts().sort_index()[0]}',
                          f'n = {pneumonia_df[pneumonia_df.intubated==1].pneumonia.value_counts().sort_index()[0]}']
with_pneumonia_labels = [f'n = {pneumonia_df[pneumonia_df.intubated==0].pneumonia.value_counts().sort_index()[1]}',
                          f'n = {pneumonia_df[pneumonia_df.intubated==1].pneumonia.value_counts().sort_index()[1]}']
ax.bar_label(container=ax.containers[0], labels=no_pneumonia_labels, label_type='edge')
ax.bar_label(container=ax.containers[1], labels=with_pneumonia_labels, label_type='edge')

In [None]:
# fig.savefig('/Users/jk1/Downloads/pneumonia_intubated_vs_non_intubated.png', dpi=600, bbox_inches='tight')