In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
combined_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/pupillometry_sah/data/Pupillometry - Datenbank Joana.xlsx'

In [None]:
combined_data_df = pd.read_excel(combined_data_path, sheet_name='Database')

In [None]:
# replace 999 with nan
combined_data_df = combined_data_df.replace(999, np.nan)

In [None]:
combined_data_df.head()

## Number of patients

patID			Patient ID in KSSG
pNr			ACPDMS Protocol number

In [None]:
print('Number of unique patient IDs:', combined_data_df.patID.nunique(), combined_data_df.pNr.nunique())
if combined_data_df.pNr.nunique() > combined_data_df.patID.nunique():
    print(f'There are {combined_data_df.pNr.nunique() - combined_data_df.patID.nunique()} patients with ICU readmission.')

## Number of scans

In [None]:
ct_results_columns = [column_name for column_name in combined_data_df.columns
                                if column_name.startswith('CT_Result')]

In [None]:
# for every patient check number of scans
n_scans_df = pd.DataFrame(columns=['patient_id', 'n_scans'])
for patient_id in combined_data_df.pNr.unique():
    patient_n_scans = combined_data_df[combined_data_df.pNr == patient_id][ct_results_columns].dropna(axis=1, how='all').shape[1]
    n_scans_df = pd.concat([n_scans_df, pd.DataFrame({'patient_id': [patient_id], 'n_scans': [patient_n_scans]})])

In [None]:
n_scans_df.n_scans.sum()

## CT results

CT_result categories:	
- 0: Without vasospasm, perfusion deficit or edema
- 1: With vasospasm
- 2: With perfusion deficit
- 3: With cerebral edema
- 4: With vasospasm and perfusion deficit
- 5: With vasospasm and edema
- 6: With perfusion deficit and edema
	
Simplified
- With vasospasm or perfusion deficit: 1, 2, 4, 5, 6
- Without vasospasm or perfusion deficit: 0, 3

In [None]:
ct_results_df = combined_data_df.pivot_table(index='pNr', values=ct_results_columns)

In [None]:
n_scans_with_spasm_or_deficit = ct_results_df[ct_results_columns].isin([1, 2, 4, 5, 6]).sum(axis=1).sum()
print(f'Number of scans with vasospasm or perfusion deficit: {n_scans_with_spasm_or_deficit}')

In [None]:
# check distribution of CT results
ct_results_df[ct_results_columns].stack().value_counts()

## Restructure data

In [None]:
identifier_columns = ["patID", "pNr"]
constants_columns = [
    "Chronic_heart_failure",
    "Diabetes",
    "HbA1c",
    "COPD/Asthma",
    "Chronic_hemodialysis",
    "Liver_cirrhosis",
    "HIV",
    "Cancer",
    "Alcohol",
    "Time_symptoms",
    "Initial_GCS",
    "Time_initial_GCS",
    "Fisher_Scale",
    "Rad_intervention",
    "Surgery",
    "Surgery_type",
    "Mortality",
    "30 day mortality",
    "GOS_Admission",
    "GOS_ICU_Discharge",
    "GOS_Hospital_Discharge",
    "GOS_30 day"
]

In [None]:
# initial data has one row per patient and one column per scan, instead we want one row per scan (all constants are repeated)

# get columns that are not constants
non_constant_columns = [column_name for column_name in combined_data_df.columns
                                if column_name not in identifier_columns + constants_columns]

scan_idxs = [int(col.split('_')[-1]) for col in non_constant_columns]
max_number_of_scans = np.max(scan_idxs)

reorganized_data_df = pd.DataFrame()
for index, row in combined_data_df.iterrows():
    # for each scan create a new row with all the constants repeated
    for scan_idx in range(1, max_number_of_scans + 1):
        associated_scan_data = row[[col for col in non_constant_columns if col.endswith(f'_{scan_idx}')]]
        # if all values are nan, skip
        if associated_scan_data.isna().all():
            continue

        new_row = row[identifier_columns + constants_columns].copy()
        # add scan number to identifier columns
        new_row['scan_idx'] = scan_idx
        
        for col in associated_scan_data.index:
            new_row['_'.join(col.split('_')[:-1]).strip()] = associated_scan_data[col]
            
        # add new_row to reorganized_data_df as a new row
        reorganized_data_df = pd.concat([reorganized_data_df, new_row.to_frame().T])

In [None]:
reorganized_data_df.reset_index(drop=True, inplace=True)

In [None]:
reorganized_data_df.rename(
    columns={
        'NPi - right side': 'NPi_right',
        'NPi - left side': 'NPi_left',
        'Pupil size right': 'Pupil_size_right',
        'Pupil size left': 'Pupil_size_left',
        'Velocity of constriction right': 'Velocity_of_constriction_right',
        'Velocity of constriction left': 'Velocity_of_constriction_left',
    },
    inplace=True
)

In [None]:
reorganized_data_df

In [None]:
# sort of right and left side, one row per scan, extra column for side
sided_reorganized_data_df = pd.DataFrame()
for index, row in reorganized_data_df.iterrows():
    # for each scan create a new row with all the constants repeated
    sides = ['right', 'left']
    for side in sides:
        non_sided_columns = [col for col in row.index if (sides[0] not in col) and (sides[1] not in col)]
        selected_side_columns = [col for col in row.index if side in col]
        
        new_row = row[non_sided_columns].copy()
        # add side column
        new_row['side'] = side
        
        for col in selected_side_columns:
            new_row['_'.join(col.split('_')[:-1]).strip()] = row[col]
            
        # add new_row to sided_reorganized_data_df as a new row
        sided_reorganized_data_df = pd.concat([sided_reorganized_data_df, new_row.to_frame().T])

sided_reorganized_data_df.reset_index(drop=True, inplace=True)

In [None]:
sided_reorganized_data_df

In [None]:
sided_reorganized_data_df['CT_Result'] = sided_reorganized_data_df['CT_Result'].astype(int)
sided_reorganized_data_df["NPi"] = sided_reorganized_data_df["NPi"].astype(float)
sided_reorganized_data_df["Pupil_size"] = sided_reorganized_data_df["Pupil_size"].astype(float)
sided_reorganized_data_df["Velocity_of_constriction"] = sided_reorganized_data_df["Velocity_of_constriction"].astype(float)

## Plot pupil metrics vs CT results

In [None]:
# two subplots, left one for overall NPi, right one for NPi per side

axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result", y="NPi", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('NPi overall')

sns.boxplot(x="CT_Result", y="NPi", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('NPi per side')

sns.despine(offset=10, trim=True)

In [None]:
# replot the same for pupil size
axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result", y="Pupil_size", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('Pupil size overall')

sns.boxplot(x="CT_Result", y="Pupil_size", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('Pupil size per side')

sns.despine(offset=10, trim=True)

In [None]:
# replot the same for velocity of constriction
axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result", y="Velocity_of_constriction", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('Velocity of constriction overall')

sns.boxplot(x="CT_Result", y="Velocity_of_constriction", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('Velocity of constriction per side')

sns.despine(offset=10, trim=True)

## Plot pupil metrics vs dichotomized CT results

In [None]:
sided_reorganized_data_df['CT_Result_dichotomized'] = sided_reorganized_data_df['CT_Result'].isin([1, 2, 4, 5, 6]).astype(int)

In [None]:
sided_reorganized_data_df

In [None]:
# NPi
# two subplots, left one for overall NPi, right one for NPi per side
axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result_dichotomized", y="NPi", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('NPi overall')

sns.boxplot(x="CT_Result_dichotomized", y="NPi", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('NPi per side')

sns.despine(offset=10, trim=True)

In [None]:
# Associated p-values
from scipy.stats import mannwhitneyu
from scipy.stats import ttest_ind

print('NPi overall')
print(mannwhitneyu(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['NPi'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['NPi'].dropna(),
))
print(ttest_ind(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['NPi'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['NPi'].dropna(),
))

for side in ['right', 'left']:
    print(f'NPi {side}')
    print(mannwhitneyu(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['NPi'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['NPi'].dropna(),
    ))
    print(ttest_ind(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['NPi'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['NPi'].dropna(),
    ))

In [None]:
# pupil size
# two subplots, left one for overall pupil size, right one for pupil size per side
axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result_dichotomized", y="Pupil_size", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('Pupil size overall')

sns.boxplot(x="CT_Result_dichotomized", y="Pupil_size", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('Pupil size per side')

sns.despine(offset=10, trim=True)

In [None]:
# Associated p-values
print('Pupil size overall')
print(mannwhitneyu(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Pupil_size'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Pupil_size'].dropna(),
))
print(ttest_ind(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Pupil_size'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Pupil_size'].dropna(),
))

for side in ['right', 'left']:
    print(f'Pupil size {side}')
    print(mannwhitneyu(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Pupil_size'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Pupil_size'].dropna(),
    ))
    print(ttest_ind(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Pupil_size'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Pupil_size'].dropna(),
    ))

In [None]:
# velocity of constriction
# two subplots, left one for overall velocity of constriction, right one for velocity of constriction per side
axes = plt.subplots(1, 2, figsize=(10, 5))

sns.boxplot(x="CT_Result_dichotomized", y="Velocity_of_constriction", data=sided_reorganized_data_df, ax=axes[1][0])
axes[1][0].title.set_text('Velocity of constriction overall')

sns.boxplot(x="CT_Result_dichotomized", y="Velocity_of_constriction", hue="side", data=sided_reorganized_data_df, ax=axes[1][1])
axes[1][1].title.set_text('Velocity of constriction per side')

sns.despine(offset=10, trim=True)

In [None]:
# Associated p-values
print('Velocity of constriction overall')
print(mannwhitneyu(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Velocity_of_constriction'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Velocity_of_constriction'].dropna(),
))
print(ttest_ind(
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Velocity_of_constriction'].dropna(),
    sided_reorganized_data_df[(sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Velocity_of_constriction'].dropna(),
))

for side in ['right', 'left']:
    print(f'Velocity of constriction {side}')
    print(mannwhitneyu(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Velocity_of_constriction'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Velocity_of_constriction'].dropna(),
    ))
    print(ttest_ind(
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 0)]['Velocity_of_constriction'].dropna(),
        sided_reorganized_data_df[(sided_reorganized_data_df['side'] == side) & (sided_reorganized_data_df['CT_Result_dichotomized'] == 1)]['Velocity_of_constriction'].dropna(),
    ))

## Plot delta (left - right) pupil metrics vs dichotomized CT results

In [None]:
reorganized_data_df['delta_NPi'] = np.abs(reorganized_data_df['NPi_right'].astype(float) - reorganized_data_df['NPi_left'].astype(float))
reorganized_data_df['delta_Pupil_size'] = np.abs(reorganized_data_df['Pupil_size_right'].astype(float) - reorganized_data_df['Pupil_size_left'].astype(float))
reorganized_data_df['delta_Velocity_of_constriction'] = np.abs(reorganized_data_df['Velocity_of_constriction_right'].astype(float) - reorganized_data_df['Velocity_of_constriction_left'].astype(float))

In [None]:
reorganized_data_df['CT_Result_dichotomized'] = reorganized_data_df['CT_Result'].isin([1, 2, 4, 5, 6]).astype(int)

In [None]:
# Delta NPi

ax = sns.boxplot(x="CT_Result_dichotomized", y="delta_NPi", data=reorganized_data_df)
ax.title.set_text('Delta NPi')

sns.despine(offset=10, trim=True)

In [None]:
# associated p-values
print('Delta NPi')
print(mannwhitneyu(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_NPi'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_NPi'].dropna(),
))
print(ttest_ind(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_NPi'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_NPi'].dropna(),
))

In [None]:
# Delta pupil size
ax = sns.boxplot(x="CT_Result_dichotomized", y="delta_Pupil_size", data=reorganized_data_df)
ax.title.set_text('Delta pupil size')

sns.despine(offset=10, trim=True)

In [None]:
# associated p-values
print('Delta pupil size')
print(mannwhitneyu(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_Pupil_size'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_Pupil_size'].dropna(),
))
print(ttest_ind(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_Pupil_size'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_Pupil_size'].dropna(),
))

In [None]:
# Delta velocity of constriction
ax = sns.boxplot(x="CT_Result_dichotomized", y="delta_Velocity_of_constriction", data=reorganized_data_df)
ax.title.set_text('Delta velocity of constriction')

sns.despine(offset=10, trim=True)

In [None]:
# associated p-values

print('Delta velocity of constriction')
print(mannwhitneyu(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_Velocity_of_constriction'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_Velocity_of_constriction'].dropna(),
))
print(ttest_ind(
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 0)]['delta_Velocity_of_constriction'].dropna(),
    reorganized_data_df[(reorganized_data_df['CT_Result_dichotomized'] == 1)]['delta_Velocity_of_constriction'].dropna(),
))