In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
cpp_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/extracted_data/cpp_df.csv'
ptio2_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/extracted_data/ptio2_df.csv'
temperature_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/extracted_data/temperature_df.csv'
lpr_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/extracted_data/lpr_df.csv'
hr_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/extracted_data/hr_df.csv'
drug_administration_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/drug_administrations.xlsx'

In [None]:
exclude_short_infusions = True

In [None]:
cpp_df = pd.read_csv(cpp_path)
ptio2_df = pd.read_csv(ptio2_path)
temperature_df = pd.read_csv(temperature_path)
lpr_df = pd.read_csv(lpr_path)
hr_df = pd.read_csv(hr_path)
drug_administration_df = pd.read_excel(drug_administration_path)

In [None]:
drug_administration_df = drug_administration_df[drug_administration_df.monitored]

n_patients_before = drug_administration_df['pat_nr'].nunique()
# print patients with exclusion criterium
print(f'Excluding {drug_administration_df[~pd.isna(drug_administration_df["further_exclusion_criterium"])].shape[0]} infusions with {drug_administration_df[~pd.isna(drug_administration_df["further_exclusion_criterium"])]["further_exclusion_criterium"].nunique()} different further exclusion criteria')
# exclude if further_exclusion_criterium is not Nan
drug_administration_df = drug_administration_df[pd.isna(drug_administration_df['further_exclusion_criterium'])]
# print number of patients excluded
print(f'Excluding {n_patients_before - drug_administration_df["pat_nr"].nunique()} patients with further exclusion criterium')

if exclude_short_infusions:
    n_patients_before = drug_administration_df['pat_nr'].nunique()
    drug_administration_df['infusion_duration'] = (pd.to_datetime(drug_administration_df['drug_end']) - pd.to_datetime(drug_administration_df['drug_start'])).dt.total_seconds() / 3600
    print(f'Excluding {drug_administration_df[drug_administration_df["infusion_duration"] <= 1].shape[0]} infusions with duration <= 1h')
    drug_administration_df = drug_administration_df[drug_administration_df['infusion_duration'] > 1]
    print(f'Excluding {n_patients_before - drug_administration_df["pat_nr"].nunique()} patients with infusions with duration <= 1h')

In [None]:
for var_df in [ptio2_df, cpp_df, temperature_df, lpr_df, hr_df]:
    var_df['datetime'] = pd.to_datetime(var_df['datetime'])

In [None]:
# for every drug administration extract data from -xh to +xh around start
time_window = 12

associated_ptio2_df = pd.DataFrame()
associated_cpp_df = pd.DataFrame()
associated_temperature_df = pd.DataFrame()
associated_hr_df = pd.DataFrame()
associated_lpr_df = pd.DataFrame()
associated_ci_df = pd.DataFrame()

for index, row in drug_administration_df.iterrows():
    lower_bound = row['drug_start'] - pd.to_timedelta(time_window, unit='h')
    upper_bound = row['drug_start'] + pd.to_timedelta(time_window, unit='h')
    instance_associated_ptio2_df = ptio2_df[(ptio2_df['pat_nr'] == row['pat_nr'])
                                            & (ptio2_df['datetime'] >= lower_bound) 
                                            & (ptio2_df['datetime'] <= upper_bound)]
    instance_associated_ptio2_df['drug_start'] = row['drug_start']
    instance_associated_ptio2_df['relative_datetime'] = (instance_associated_ptio2_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_ptio2_df = pd.concat([associated_ptio2_df, instance_associated_ptio2_df])

    instance_associated_cpp_df = cpp_df[(cpp_df['pat_nr'] == row['pat_nr'])
                                        & (cpp_df['datetime'] >= lower_bound) 
                                        & (cpp_df['datetime'] <= upper_bound)]  
    instance_associated_cpp_df['drug_start'] = row['drug_start']
    instance_associated_cpp_df['relative_datetime'] = (instance_associated_cpp_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_cpp_df = pd.concat([associated_cpp_df, instance_associated_cpp_df])

    instance_associated_temperature_df = temperature_df[(temperature_df['pat_nr'] == row['pat_nr'])
                                        & (temperature_df['datetime'] >= lower_bound)
                                        & (temperature_df['datetime'] <= upper_bound)]
    instance_associated_temperature_df['drug_start'] = row['drug_start']
    instance_associated_temperature_df['relative_datetime'] = (instance_associated_temperature_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_temperature_df = pd.concat([associated_temperature_df, instance_associated_temperature_df])

    instance_associated_hr_df = hr_df[(hr_df['pat_nr'] == row['pat_nr'])
                                        & (hr_df['datetime'] >= lower_bound)   
                                        & (hr_df['datetime'] <= upper_bound)]
    instance_associated_hr_df['drug_start'] = row['drug_start']
    instance_associated_hr_df['relative_datetime'] = (instance_associated_hr_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_hr_df = pd.concat([associated_hr_df, instance_associated_hr_df])

    instance_associated_lpr_df = lpr_df[(lpr_df['pat_nr'] == row['pat_nr'])
                                        & (lpr_df['datetime'] >= lower_bound)
                                        & (lpr_df['datetime'] <= upper_bound)]
    instance_associated_lpr_df['drug_start'] = row['drug_start']
    instance_associated_lpr_df['relative_datetime'] = (instance_associated_lpr_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_lpr_df = pd.concat([associated_lpr_df, instance_associated_lpr_df])

    

In [None]:
for var_df in [associated_ptio2_df, associated_cpp_df, associated_temperature_df, associated_hr_df, associated_lpr_df]:
    var_df['relative_datetime_cat'] = var_df['relative_datetime'].round(2)

In [None]:
grouped_ptio2 = associated_ptio2_df.groupby(['pat_nr', 'drug_start', 'relative_datetime_cat']).agg({'ptio2': 'median'}).reset_index()
grouped_cpp = associated_cpp_df.groupby(['pat_nr', 'drug_start', 'relative_datetime_cat']).agg({'cpp': 'median'}).reset_index()
grouped_temperature = associated_temperature_df.groupby(['pat_nr', 'drug_start', 'relative_datetime_cat']).agg({'temperature': 'median'}).reset_index()
grouped_hr = associated_hr_df.groupby(['pat_nr', 'drug_start', 'relative_datetime_cat']).agg({'hr': 'median'}).reset_index()
grouped_lpr = associated_lpr_df.groupby(['pat_nr', 'drug_start', 'relative_datetime_cat']).agg({'lpr': 'median'}).reset_index()

In [None]:
# merge
concatenated_df = grouped_ptio2.merge(grouped_cpp, on=['pat_nr', 'drug_start', 'relative_datetime_cat'], how='outer')
concatenated_df = concatenated_df.merge(grouped_temperature, on=['pat_nr', 'drug_start', 'relative_datetime_cat'], how='outer')
concatenated_df = concatenated_df.merge(grouped_hr, on=['pat_nr', 'drug_start', 'relative_datetime_cat'], how='outer')
concatenated_df = concatenated_df.merge(grouped_lpr, on=['pat_nr', 'drug_start', 'relative_datetime_cat'], how='outer')

In [None]:
concatenated_df['pre_post'] = 'pre'
concatenated_df.loc[concatenated_df['relative_datetime_cat'] >= 0, 'pre_post'] = 'post'

## Multivariate model
create full mixed effects model with all variables and interactions

In [None]:
temp_df = concatenated_df.drop(columns=['lpr'])
# drop rows with nan values
temp_df = temp_df.dropna()

In [None]:
mixed_model = smf.mixedlm("ptio2 ~ pre_post * cpp * temperature * hr", temp_df, groups=temp_df['pat_nr'])

In [None]:
mixed_model_fit = mixed_model.fit()

In [None]:
mixed_model.score(mixed_model_fit.params_object)

In [None]:
mixed_model_fit.summary()

In [None]:
print(mixed_model_fit.pvalues)

# Single variable models 

In [None]:
# ptio2
temp_ptio2_df = concatenated_df.drop(columns=['cpp', 'temperature', 'hr', 'lpr'])
# drop rows with nan values
temp_ptio2_df = temp_df.dropna()

mixed_model_ptio2 = smf.mixedlm("ptio2 ~ pre_post", temp_ptio2_df, groups=temp_ptio2_df['pat_nr'])

In [None]:
mixed_model_ptio2_fit = mixed_model_ptio2.fit()
mixed_model_ptio2_fit.summary()

In [None]:
mixed_model_ptio2_fit.pvalues

In [None]:
# pre / post median and IQR
print(f'Pre: {concatenated_df[concatenated_df["pre_post"] == "pre"]["ptio2"].median()} ({concatenated_df[concatenated_df["pre_post"] == "pre"]["ptio2"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "pre"]["ptio2"].quantile(0.75)})',
f'Post: {concatenated_df[concatenated_df["pre_post"] == "post"]["ptio2"].median()} ({concatenated_df[concatenated_df["pre_post"] == "post"]["ptio2"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "post"]["ptio2"].quantile(0.75)})')

In [None]:
# cpp
temp_cpp_df = concatenated_df.drop(columns=['ptio2', 'temperature', 'hr', 'lpr'])
# drop rows with nan values
temp_cpp_df = temp_df.dropna()

In [None]:
mixed_model_cpp = smf.mixedlm("cpp ~ pre_post", temp_cpp_df, groups=temp_cpp_df['pat_nr'])

In [None]:
mixed_model_cpp_fit = mixed_model_cpp.fit()
mixed_model_cpp_fit.summary()

In [None]:
mixed_model_cpp_fit.pvalues

In [None]:
# pre / post median and IQR
print(f'Pre: {concatenated_df[concatenated_df["pre_post"] == "pre"]["cpp"].median()} ({concatenated_df[concatenated_df["pre_post"] == "pre"]["cpp"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "pre"]["cpp"].quantile(0.75)})',
f'Post: {concatenated_df[concatenated_df["pre_post"] == "post"]["cpp"].median()} ({concatenated_df[concatenated_df["pre_post"] == "post"]["cpp"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "post"]["cpp"].quantile(0.75)})')

In [None]:
# temperature
temp_temperature_df = concatenated_df.drop(columns=['cpp', 'ptio2', 'hr', 'lpr'])
# drop rows with nan values
temp_temperature_df = temp_df.dropna()

In [None]:
mixed_model_temperature = smf.mixedlm("temperature ~ pre_post", temp_temperature_df, groups=temp_temperature_df['pat_nr'])

In [None]:
mixed_model_temperature_fit = mixed_model_temperature.fit()
mixed_model_temperature_fit.summary()

In [None]:
mixed_model_temperature_fit.pvalues

In [None]:
# pre / post median and IQR
print(f'Pre: {concatenated_df[concatenated_df["pre_post"] == "pre"]["temperature"].median()} ({concatenated_df[concatenated_df["pre_post"] == "pre"]["temperature"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "pre"]["temperature"].quantile(0.75)})',
f'Post: {concatenated_df[concatenated_df["pre_post"] == "post"]["temperature"].median()} ({concatenated_df[concatenated_df["pre_post"] == "post"]["temperature"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "post"]["temperature"].quantile(0.75)})')

In [None]:
# hr
temp_hr_df = concatenated_df.drop(columns=['cpp', 'temperature', 'ptio2', 'lpr'])
# drop rows with nan values
temp_hr_df = temp_df.dropna()

In [None]:
mixed_model_hr = smf.mixedlm("hr ~ pre_post", temp_hr_df, groups=temp_hr_df['pat_nr'])

In [None]:
mixed_model_hr_fit = mixed_model_hr.fit()
mixed_model_hr_fit.summary()

In [None]:
mixed_model_hr_fit.pvalues

In [None]:
# pre / post median and IQR
print(f'Pre: {concatenated_df[concatenated_df["pre_post"] == "pre"]["hr"].median()} ({concatenated_df[concatenated_df["pre_post"] == "pre"]["hr"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "pre"]["hr"].quantile(0.75)})',
f'Post: {concatenated_df[concatenated_df["pre_post"] == "post"]["hr"].median()} ({concatenated_df[concatenated_df["pre_post"] == "post"]["hr"].quantile(0.25)}, {concatenated_df[concatenated_df["pre_post"] == "post"]["hr"].quantile(0.75)})')