In [None]:
import pandas as pd

In [None]:
timebin_path = '/Users/jk1/temp/bp_dci/bp_timebin_8h/bp_timebins_8h_nor_annotated.csv'

In [None]:
timebin_df = pd.read_csv(timebin_path)

In [None]:
timebin_df

In [None]:
# filter out rows with concomitant_noradrenaline
if 'noradrenaline_concomitant' in timebin_df.columns:
    n_patients_with_concomitant_noradrenaline = timebin_df[timebin_df['noradrenaline_concomitant'] == 1]['pNr'].nunique()
    n_measures_with_concomitant_noradrenaline = timebin_df[timebin_df['noradrenaline_concomitant'] == 1].shape[0]
    n_pos_measures_with_concomitant_noradrenaline = timebin_df[(timebin_df['noradrenaline_concomitant'] == 1) & (timebin_df['within_event_timebin'] == 1)].shape[0]
    n_neg_measures_with_concomitant_noradrenaline = timebin_df[(timebin_df['noradrenaline_concomitant'] == 1) & (timebin_df['within_event_timebin'] == 0)].shape[0]
    
    # percentages
    print(f'Percentage of patients with concomitant noradrenaline: {n_patients_with_concomitant_noradrenaline / timebin_df["pNr"].nunique()}')
    print(f'Percentage of measures with concomitant noradrenaline: {n_measures_with_concomitant_noradrenaline / timebin_df.shape[0]}')
    print(f'Percentage of positive measures with concomitant noradrenaline: {n_pos_measures_with_concomitant_noradrenaline / timebin_df[timebin_df["within_event_timebin"] == 1].shape[0]}')
    print(f'Percentage of negative measures with concomitant noradrenaline: {n_neg_measures_with_concomitant_noradrenaline / timebin_df[timebin_df["within_event_timebin"] == 0].shape[0]}')
    timebin_df = timebin_df[timebin_df['noradrenaline_concomitant'] != 1]

In [None]:
# Create metrics for negative timebins
# group by 'negative_timebin' and 'pNr', then obtain median of 'systole', 'diastole', 'mitteldruck'
median_df = timebin_df.groupby(['negative_timebin', 'pNr']).agg({'systole': 'median', 'diastole': 'median', 'mitteldruck': 'median'}).reset_index()
median_df = median_df.rename(columns={'systole': 'systole_median', 'diastole': 'diastole_median', 'mitteldruck': 'mitteldruck_median'})

max_df = timebin_df.groupby(['negative_timebin', 'pNr']).agg({'systole': 'max', 'diastole': 'max', 'mitteldruck': 'max'}).reset_index()
max_df = max_df.rename(columns={'systole': 'systole_max', 'diastole': 'diastole_max', 'mitteldruck': 'mitteldruck_max'})

min_df = timebin_df.groupby(['negative_timebin', 'pNr']).agg({'systole': 'min', 'diastole': 'min', 'mitteldruck': 'min'}).reset_index()
min_df = min_df.rename(columns={'systole': 'systole_min', 'diastole': 'diastole_min', 'mitteldruck': 'mitteldruck_min'})

In [None]:
# Merge the metrics 
negative_timebin_metrics = pd.merge(median_df, max_df, on=['negative_timebin', 'pNr'])
negative_timebin_metrics = pd.merge(negative_timebin_metrics, min_df, on=['negative_timebin', 'pNr'])
negative_timebin_metrics['label'] = 0

In [None]:
negative_timebin_metrics

In [None]:
# Positive timebins
pos_median_df = timebin_df.groupby(['associated_event_time', 'pNr']).agg({'systole': 'median', 'diastole': 'median', 'mitteldruck': 'median'}).reset_index()
pos_median_df = pos_median_df.rename(columns={'systole': 'systole_median', 'diastole': 'diastole_median', 'mitteldruck': 'mitteldruck_median'})

pos_max_df = timebin_df.groupby(['associated_event_time', 'pNr']).agg({'systole': 'max', 'diastole': 'max', 'mitteldruck': 'max'}).reset_index()
pos_max_df = pos_max_df.rename(columns={'systole': 'systole_max', 'diastole': 'diastole_max', 'mitteldruck': 'mitteldruck_max'})

pos_min_df = timebin_df.groupby(['associated_event_time', 'pNr']).agg({'systole': 'min', 'diastole': 'min', 'mitteldruck': 'min'}).reset_index()
pos_min_df = pos_min_df.rename(columns={'systole': 'systole_min', 'diastole': 'diastole_min', 'mitteldruck': 'mitteldruck_min'})

In [None]:
# Merge the metrics for positive timebins
pos_timebin_metrics = pd.merge(pos_median_df, pos_max_df, on=['associated_event_time', 'pNr'])
pos_timebin_metrics = pd.merge(pos_timebin_metrics, pos_min_df, on=['associated_event_time', 'pNr'])
pos_timebin_metrics['label'] = 1

In [None]:
pos_timebin_metrics

In [None]:
# Merge the negative and positive timebin metrics
timebin_metrics = pd.concat([negative_timebin_metrics, pos_timebin_metrics])

In [None]:
timebin_metrics.head()

In [None]:
timebin_metrics.label.value_counts()

In [None]:
# for every metric in timebin_metrics plot a boxplot according to the label
import seaborn as sns
import matplotlib.pyplot as plt

metrics = ['systole_median', 'diastole_median', 'mitteldruck_median', 'systole_max', 'diastole_max', 'mitteldruck_max', 'systole_min', 'diastole_min', 'mitteldruck_min']

fig, axes = plt.subplots(3, 3, figsize=(15, 15))
for i, metric in enumerate(metrics):
    sns.boxplot(x='label', y=metric, data=timebin_metrics, ax=axes[i//3, i%3], showfliers=False, palette='Set3')
    axes[i//3, i%3].set_title(metric)