In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
ptio2_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/ptio2_df.csv'
filtered_ptio2_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/ptio2_df_filtered.csv'
temperature_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/temperature_df.csv'
lpr_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/lpr_df.csv'
cpp_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/cpp_df.csv'
hr_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/hr_df.csv'
etco2_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/etco2_df.csv'
ci_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/ci_df.csv'
prx_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_extracted_data/prx_df.csv'
drug_administration_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/drug_administrations.xlsx'
registry_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/moberg_registry_kssg_post_hoc_modified.xlsx'
paco2_path = '/Users/jk1/stroke_datasets/ptiO2-Studie/pdms_data/joined_aBGA.csv'
mainstream_etco2_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/neurocrit_fever/data/PDMS_data/joined_etCO2.csv'

In [None]:
exclude_short_infusions = True
use_filtered_ptio2 = True

In [None]:
cpp_df = pd.read_csv(cpp_path)
if use_filtered_ptio2:
    ptio2_df = pd.read_csv(filtered_ptio2_path)
else:
    ptio2_df = pd.read_csv(ptio2_path)
temperature_df = pd.read_csv(temperature_path)
lpr_df = pd.read_csv(lpr_path)
hr_df = pd.read_csv(hr_path)
# No associated CI measurements - so code needs no to be executed
# ci_df = pd.read_csv(ci_path)
etco2_df = pd.read_csv(etco2_path)
prx_df = pd.read_csv(prx_path)
drug_administration_df = pd.read_excel(drug_administration_path)

In [None]:
registry_df = pd.read_excel(registry_path)
paco2_df = pd.read_csv(paco2_path, sep=';')
mainstream_etco2_df = pd.read_csv(mainstream_etco2_path, sep=';')

In [None]:
paco2_df = pd.merge(paco2_df, registry_df[['manual_mrn', 'Pat. Nr.']], left_on='FallNr', right_on='manual_mrn',
                    how='left')
paco2_df.drop(columns=['manual_mrn'], inplace=True)
paco2_df.rename(columns={'Zeitpunkt_aBGA': 'datetime', 'Pat. Nr.': 'pat_nr'}, inplace=True)
paco2_df['pCO2_mmHg'] = paco2_df['pCO2'] * 7.50062

mainstream_etco2_df = pd.merge(mainstream_etco2_df, registry_df[['manual_mrn', 'Pat. Nr.']], left_on='FallNr',
                               right_on='manual_mrn', how='left')
mainstream_etco2_df.drop(columns=['manual_mrn'], inplace=True)
mainstream_etco2_df.rename(columns={'Pat. Nr.': 'pat_nr'}, inplace=True)
mainstream_etco2_df.rename(columns={'Zeitpunkt_etCO2': 'datetime'}, inplace=True)

In [None]:
drug_administration_df = drug_administration_df[drug_administration_df.monitored]
# exclude if further_exclusion_criterium is not Nan
drug_administration_df = drug_administration_df[pd.isna(drug_administration_df['further_exclusion_criterium'])]
if exclude_short_infusions:
    drug_administration_df['infusion_duration'] = (pd.to_datetime(drug_administration_df['drug_end']) - pd.to_datetime(
        drug_administration_df['drug_start'])).dt.total_seconds() / 3600
    print(
        f'Excluding {drug_administration_df[drug_administration_df["infusion_duration"] <= 1].shape[0]} infusions with duration <= 1h')
    drug_administration_df = drug_administration_df[drug_administration_df['infusion_duration'] > 1]


In [None]:
for var_df in [ptio2_df, cpp_df, temperature_df, lpr_df, hr_df, etco2_df, paco2_df, mainstream_etco2_df, prx_df]:
    var_df['datetime'] = pd.to_datetime(var_df['datetime'])

In [None]:
# for every drug administration extract data from -xh to +xh around start
time_window = 12

associated_ptio2_df = pd.DataFrame()
associated_cpp_df = pd.DataFrame()
associated_temperature_df = pd.DataFrame()
associated_hr_df = pd.DataFrame()
associated_lpr_df = pd.DataFrame()
associated_ci_df = pd.DataFrame()
associated_prx_df = pd.DataFrame()
associated_etco2_df = pd.DataFrame()
associated_paco2_df = pd.DataFrame()
associated_mainstream_etco2_df = pd.DataFrame()

for index, row in drug_administration_df.iterrows():
    lower_bound = row['drug_start'] - pd.to_timedelta(time_window, unit='h')
    upper_bound = row['drug_start'] + pd.to_timedelta(time_window, unit='h')
    instance_associated_ptio2_df = ptio2_df[(ptio2_df['pat_nr'] == row['pat_nr'])
                                            & (ptio2_df['datetime'] >= lower_bound) 
                                            & (ptio2_df['datetime'] <= upper_bound)]
    instance_associated_ptio2_df['drug_start'] = row['drug_start']
    instance_associated_ptio2_df['relative_datetime'] = (instance_associated_ptio2_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_ptio2_df = pd.concat([associated_ptio2_df, instance_associated_ptio2_df])

    instance_associated_cpp_df = cpp_df[(cpp_df['pat_nr'] == row['pat_nr'])
                                        & (cpp_df['datetime'] >= lower_bound) 
                                        & (cpp_df['datetime'] <= upper_bound)]  
    instance_associated_cpp_df['drug_start'] = row['drug_start']
    instance_associated_cpp_df['relative_datetime'] = (instance_associated_cpp_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_cpp_df = pd.concat([associated_cpp_df, instance_associated_cpp_df])

    instance_associated_temperature_df = temperature_df[(temperature_df['pat_nr'] == row['pat_nr'])
                                        & (temperature_df['datetime'] >= lower_bound)
                                        & (temperature_df['datetime'] <= upper_bound)]
    instance_associated_temperature_df['drug_start'] = row['drug_start']
    instance_associated_temperature_df['relative_datetime'] = (instance_associated_temperature_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_temperature_df = pd.concat([associated_temperature_df, instance_associated_temperature_df])

    instance_associated_hr_df = hr_df[(hr_df['pat_nr'] == row['pat_nr'])
                                        & (hr_df['datetime'] >= lower_bound)   
                                        & (hr_df['datetime'] <= upper_bound)]
    instance_associated_hr_df['drug_start'] = row['drug_start']
    instance_associated_hr_df['relative_datetime'] = (instance_associated_hr_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_hr_df = pd.concat([associated_hr_df, instance_associated_hr_df])

    instance_associated_lpr_df = lpr_df[(lpr_df['pat_nr'] == row['pat_nr'])
                                        & (lpr_df['datetime'] >= lower_bound)
                                        & (lpr_df['datetime'] <= upper_bound)]
    instance_associated_lpr_df['drug_start'] = row['drug_start']
    instance_associated_lpr_df['relative_datetime'] = (instance_associated_lpr_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_lpr_df = pd.concat([associated_lpr_df, instance_associated_lpr_df])
    
    instance_associated_etco2_df = etco2_df[(etco2_df['pat_nr'] == row['pat_nr'])
                                        & (etco2_df['datetime'] >= lower_bound)
                                        & (etco2_df['datetime'] <= upper_bound)]
    instance_associated_etco2_df['drug_start'] = row['drug_start']
    instance_associated_etco2_df['relative_datetime'] = (instance_associated_etco2_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_etco2_df = pd.concat([associated_etco2_df, instance_associated_etco2_df])
    
    instance_associated_paco2_df = paco2_df[(paco2_df['pat_nr'] == row['pat_nr'])
                                        & (paco2_df['datetime'] >= lower_bound)
                                        & (paco2_df['datetime'] <= upper_bound)]
    instance_associated_paco2_df['drug_start'] = row['drug_start']
    instance_associated_paco2_df['relative_datetime'] = (instance_associated_paco2_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_paco2_df = pd.concat([associated_paco2_df, instance_associated_paco2_df])
    
    instance_associated_mainstream_etco2_df = mainstream_etco2_df[(mainstream_etco2_df['pat_nr'] == row['pat_nr'])
                                        & (mainstream_etco2_df['datetime'] >= lower_bound)
                                        & (mainstream_etco2_df['datetime'] <= upper_bound)]
    instance_associated_mainstream_etco2_df['drug_start'] = row['drug_start']
    instance_associated_mainstream_etco2_df['relative_datetime'] = (instance_associated_mainstream_etco2_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_mainstream_etco2_df = pd.concat([associated_mainstream_etco2_df, instance_associated_mainstream_etco2_df])
    
    instance_associated_prx_df = prx_df[(prx_df['pat_nr'] == row['pat_nr'])
                                        & (prx_df['datetime'] >= lower_bound)
                                        & (prx_df['datetime'] <= upper_bound)]
    instance_associated_prx_df['drug_start'] = row['drug_start']
    instance_associated_prx_df['relative_datetime'] = (instance_associated_prx_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    associated_prx_df = pd.concat([associated_prx_df, instance_associated_prx_df])
    
    # No associated CI measurements - so code needs no to be executed
    # instance_associated_ci_df = ci_df[(ci_df['pat_nr'] == row['pat_nr'])
    #                                     & (ci_df['datetime'] >= lower_bound)
    #                                     & (ci_df['datetime'] <= upper_bound)]
    # instance_associated_ci_df['drug_start'] = row['drug_start']
    # instance_associated_ci_df['relative_datetime'] = (instance_associated_ci_df['datetime'] - row['drug_start']).dt.total_seconds() / 3600
    # associated_ci_df = pd.concat([associated_ci_df, instance_associated_ci_df])

In [None]:
# ptio2_df.groupby('pat_nr').datetime.diff().dt.total_seconds()

sampling_rate_table = pd.DataFrame(columns=['variable', 'sampling_rate'])
for variable, var_df in zip(['ptio2', 'cpp', 'temperature', 'lpr', 'hr', 'etco2', 'paco2', 'mainstream_etco2', 'prx'],
                            [associated_ptio2_df, associated_cpp_df, associated_temperature_df, associated_lpr_df, associated_hr_df, associated_etco2_df, associated_paco2_df, associated_mainstream_etco2_df, associated_prx_df]):
    
    temp_diff = var_df.groupby('pat_nr').datetime.diff().dt.total_seconds()
    temp_df = pd.DataFrame({'variable': variable,  'sampling_rate_s': f'{temp_diff.median()} (IQR: {temp_diff.quantile(0.25)}-{temp_diff.quantile(0.75)})', 'sampling_rate_h': f'{temp_diff.median()/3600} (IQR: {temp_diff.quantile(0.25)/3600}-{temp_diff.quantile(0.75)/3600})'}, index=[0])
    sampling_rate_table = pd.concat([sampling_rate_table, temp_df], ignore_index=True)


In [None]:
sampling_rate_table

In [None]:
# sampling_rate_table.to_excel('/Users/jk1/Downloads/sampling_rate_table.xlsx', index=False)