Relationship of ketones with outcome after ischemic stroke

Ketones not in dataset -> use anion gap as surrogate

In [None]:
import pandas as pd
import numpy as np
import os
from scipy.stats import pearsonr
from statsmodels.miscmodels.ordinal_model import OrderedModel
import seaborn as sns
import matplotlib.pyplot as plt
from utils import create_registry_case_identification_column, create_ehr_case_identification_column, patient_selection
from utils import load_data_from_main_dir
from lab_preprocessing import preprocess_labs
from outcome_preprocessing import preprocess_outcomes


In [None]:
eds_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20221117/eds_j1.csv'
ehr_data_path = '/Users/jk1/stroke_datasets/stroke_unit_dataset/per_value/Extraction_20221117/'
registry_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/stroke_research/geneva_stroke_unit_dataset/data/stroke_registry/post_hoc_modified/stroke_registry_post_hoc_modified.xlsx'

In [None]:
eds_df = pd.read_csv(eds_path, delimiter=';', encoding='utf-8',
                         dtype=str)
registry_df = pd.read_excel(registry_path, dtype=str)

In [None]:
registry_df['case_admission_id'] = create_registry_case_identification_column(registry_df)
eds_df['case_admission_id'] = create_ehr_case_identification_column(eds_df)

In [None]:
inclusion_registry_df, excluded_patients_df = patient_selection(
    registry_path=registry_path,
    eds_path=eds_path,
    exclude_patients_under_18=True,
    exclude_non_ischemic_stroke=True,
    exclude_non_acute_stroke=True,
    verbose=True
)

In [None]:
eds_df['case_admission_id'] = eds_df[eds_df['case_admission_id'].isin(inclusion_registry_df['case_admission_id'])]['case_admission_id']
print(f'Number of patients in EDS after selection: {eds_df.patient_id.nunique()}')

In [None]:
lab_file_start = 'labo'
lab_df = load_data_from_main_dir(ehr_data_path, lab_file_start)
lab_df['case_admission_id'] = create_ehr_case_identification_column(lab_df)

In [None]:
preprocessed_anion_gap_df = preprocess_labs(lab_df, ["sodium", "chlore", "HCO3", "lactate"])

In [None]:
preprocessed_anion_gap_df = preprocessed_anion_gap_df.pivot_table(index=['case_admission_id', "sample_date"], columns='dosage_label', values='value').reset_index()

In [None]:
preprocessed_anion_gap_df['anion_gap'] = preprocessed_anion_gap_df['sodium'] - (preprocessed_anion_gap_df['chlore'] + preprocessed_anion_gap_df['HCO3'])
preprocessed_anion_gap_df['anion_gap'].hist(bins=50)

In [None]:
# create non_lactate anion gap column (anion gap if lactate < 2.5)
preprocessed_anion_gap_df['non_lactate_anion_gap'] = preprocessed_anion_gap_df.apply(
    lambda row: row['anion_gap'] if row['lactate'] < 2.5 else None, axis=1
)

In [None]:
preprocessed_anion_gap_df[preprocessed_anion_gap_df['non_lactate_anion_gap'].notnull()].nunique()

In [None]:
inclusion_registry_df['T0'] = inclusion_registry_df['stroke_dt'].fillna(inclusion_registry_df['arrival_dt'])
preprocessed_anion_gap_df = preprocessed_anion_gap_df.merge(
    inclusion_registry_df[['case_admission_id', 'T0']],
    on='case_admission_id',
    how='left'
)

In [None]:
dt_format = '%d.%m.%Y %H:%M'
preprocessed_anion_gap_df['relative_sample_date'] = (pd.to_datetime(preprocessed_anion_gap_df['sample_date'], format=dt_format) - pd.to_datetime(preprocessed_anion_gap_df['T0'], format=dt_format)).dt.total_seconds() / 3600 # convert to hours

In [None]:
preprocessed_anion_gap_df['relative_sample_date_hcat'] = preprocessed_anion_gap_df['relative_sample_date'].apply(np.floor)

In [None]:
preprocessed_anion_gap_df.case_admission_id.nunique()

In [None]:
n_patients_with_ag_in_first_24h = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > -12) & (preprocessed_anion_gap_df.relative_sample_date < 24) 
                                                            & (preprocessed_anion_gap_df.non_lactate_anion_gap.notna())].case_admission_id.nunique()
n_patients_with_ag_in_24_to_72h = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > 24) & (preprocessed_anion_gap_df.relative_sample_date < 3*24)
                                                            & (preprocessed_anion_gap_df.non_lactate_anion_gap.notna())].case_admission_id.nunique()

print(f'Number of patients with anion gap in first 24h: {n_patients_with_ag_in_first_24h}')
print(f'Number of patients with anion gap in 24 to 72h: {n_patients_with_ag_in_24_to_72h}')

In [None]:
outcome_df = preprocess_outcomes(registry_path)
outcome_df = outcome_df[outcome_df.case_admission_id.isin(inclusion_registry_df.case_admission_id.unique())]
outcome_df.drop_duplicates(subset='case_admission_id', keep='first', inplace=True)

In [None]:
preprocessed_anion_gap_df = preprocessed_anion_gap_df.merge(
    outcome_df[['case_admission_id', '3M mRS']],
    on='case_admission_id',
    how='left'
)

In [None]:
preprocessed_anion_gap_df

In [None]:
# Overall correlation between non-lactate anion gap and 3M mRS

temp_df = preprocessed_anion_gap_df[preprocessed_anion_gap_df['non_lactate_anion_gap'].notna() & preprocessed_anion_gap_df['3M mRS'].notna()]
temp_df['3M mRS'] = temp_df['3M mRS'].astype(int)
corr, p_value = pearsonr(temp_df['non_lactate_anion_gap'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
early_anion_gap_df = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > -12) & (preprocessed_anion_gap_df.relative_sample_date < 24)]
anion_gap_d2_df = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > 24) & (preprocessed_anion_gap_df.relative_sample_date < 2*72)]
anion_gap_d3_df = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > 2*24) & (preprocessed_anion_gap_df.relative_sample_date < 3*72)]
anion_gap_d_2_3_df = preprocessed_anion_gap_df[(preprocessed_anion_gap_df.relative_sample_date > 1*24) & (preprocessed_anion_gap_df.relative_sample_date < 3*72)]

In [None]:
# correlation of early anion gap with outcome
temp_df = early_anion_gap_df.dropna(subset=['non_lactate_anion_gap', '3M mRS'])
corr, p_value = pearsonr(temp_df['non_lactate_anion_gap'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d2 anion gap with outcome
temp_df = anion_gap_d2_df.dropna(subset=['non_lactate_anion_gap', '3M mRS'])
corr, p_value = pearsonr(temp_df['non_lactate_anion_gap'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d3 anion gap with outcome
temp_df = anion_gap_d3_df.dropna(subset=['non_lactate_anion_gap', '3M mRS'])
corr, p_value = pearsonr(temp_df['non_lactate_anion_gap'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')

In [None]:
# correlation of d2-d3 anion gap with outcome
temp_df = anion_gap_d_2_3_df.dropna(subset=['non_lactate_anion_gap', '3M mRS'])
corr, p_value = pearsonr(temp_df['non_lactate_anion_gap'], temp_df['3M mRS'])
print(f'Pearson correlation: {corr}, p-value: {p_value}')
