# Exploration of pupillometry data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pupillometry_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/pupillometry_sah/data/Pupillometry - Datenbank Päddy.xls'

In [None]:
pupillometry_df = pd.read_excel(pupillometry_data_path, sheet_name='Pupillometrie')

999 equals to missing data

In [None]:
# replace 999 with NaN
pupillometry_df = pupillometry_df.replace(999, np.nan)

In [None]:
pupillometry_df

In [None]:
pupillometry_df.pNr.nunique()

In [None]:
# plot evolution of NPi_r_wert and NPi_r_wert.1 over time for each patient
for pid in pupillometry_df['pNr'].unique():
    ax = pupillometry_df[pupillometry_df['pNr'] == pid].plot(x='Pupillometrie_Zeitpunkt', y=['NPi_r_wert', 'NPi_r_wert.1'], title='Patient {}'.format(pid))
    # tilt x ticks
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')


In [None]:
# plot evolution of CV_r_wert and CV_r_wert.1 over time for each patient
for pid in pupillometry_df['pNr'].unique():
    ax = pupillometry_df[pupillometry_df['pNr'] == pid].plot(x='Pupillometrie_Zeitpunkt', y=['CV_r_wert', 'CV_r_wert.1'], title='Patient {}'.format(pid))
    # tilt x ticks
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

In [None]:
# plot evolution of Si_r_wert and Si_r_wert.1 over time for each patient
for pid in pupillometry_df['pNr'].unique():
    ax = pupillometry_df[pupillometry_df['pNr'] == pid].plot(x='Pupillometrie_Zeitpunkt', y=['Si_r_wert', 'Si_r_wert.1'], title='Patient {}'.format(pid))
    # tilt x ticks
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

In [None]:
# create a common plot with 3 subplots for each patient (NPi, CV, Si) using seaborn
fig = plt.figure(figsize=(20, 120))

n_patients = pupillometry_df['pNr'].nunique()

# one patient per row, 3 columns
for i, pid in enumerate(pupillometry_df['pNr'].unique()):
    # if patient has only missing data in relevant columns, skip
    if pupillometry_df[pupillometry_df['pNr'] == pid][['NPi_r_wert', 'NPi_r_wert.1', 'CV_r_wert', 'CV_r_wert.1', 'Si_r_wert', 'Si_r_wert.1']].isna().all().all():
        continue
    
    [ax1, ax2, ax3] = fig.add_subplot(n_patients, 3, i * 3 + 1), fig.add_subplot(n_patients, 3, i * 3 + 2), fig.add_subplot(n_patients, 3, i * 3 + 3)
    
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert.1', ax=ax1, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert', ax=ax1, label='right')
    ax1.set_ylabel('NPi')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert.1', ax=ax1, label='left')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert', ax=ax1, label='right')
    ax1.set_ylabel('NPi')
    
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert.1', ax=ax2, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert', ax=ax2, label='right')
    ax2.set_ylabel('CV')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert.1', ax=ax2, label='left')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert', ax=ax2, label='right')
    ax2.set_ylabel('CV')
    
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert.1', ax=ax3, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert', ax=ax3, label='right')
    ax3.set_ylabel('Size')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert.1', ax=ax3, label='left')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert', ax=ax3, label='right')
    ax3.set_ylabel('Size')
    
    # tilt x ticks
    _ = [ax.set_xticklabels(ax.get_xticklabels(), rotation=15, ha='right') for ax in [ax1, ax2, ax3]]
    # format x ticks by splitting at first space
    _ = [ax.set_xticklabels([x.get_text().split(' ')[0] for x in ax.get_xticklabels()]) for ax in [ax1, ax2, ax3]]
    
    # if n ticks over 10, set max 10 ticks
    if len(ax1.get_xticklabels()) > 10:
        _ = [ax.xaxis.set_major_locator(plt.MaxNLocator(10)) for ax in [ax1, ax2, ax3]]
    
    # remove x label
    _ = [ax.set_xlabel('') for ax in [ax1, ax2, ax3]]
    
    # set title for row
    ax2.set_title('Patient {}'.format(pid))
    
    

    

In [None]:
# save figure to file
fig.savefig('', dpi=300, bbox_inches='tight')

### Add CT data on top of plot

In [None]:
combined_data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/pupillometry_sah/data/Pupillometry - Datenbank Joana.xlsx'

In [None]:
combined_data_df = pd.read_excel(combined_data_path, sheet_name='Database')
# replace 999 with nan
combined_data_df = combined_data_df.replace(999, np.nan)

Restructure data so that each row is a scan

In [None]:
identifier_columns = ["patID", "pNr"]
constants_columns = [
    "Chronic_heart_failure",
    "Diabetes",
    "HbA1c",
    "COPD/Asthma",
    "Chronic_hemodialysis",
    "Liver_cirrhosis",
    "HIV",
    "Cancer",
    "Alcohol",
    "Time_symptoms",
    "Initial_GCS",
    "Time_initial_GCS",
    "Fisher_Scale",
    "Rad_intervention",
    "Surgery",
    "Surgery_type",
    "Mortality",
    "30 day mortality",
    "GOS_Admission",
    "GOS_ICU_Discharge",
    "GOS_Hospital_Discharge",
    "GOS_30 day"
]

# initial data has one row per patient and one column per scan, instead we want one row per scan (all constants are repeated)
# get columns that are not constants
non_constant_columns = [column_name for column_name in combined_data_df.columns
                        if column_name not in identifier_columns + constants_columns]

scan_idxs = [int(col.split('_')[-1]) for col in non_constant_columns]
max_number_of_scans = np.max(scan_idxs)

reorganized_data_df = pd.DataFrame()
for index, row in combined_data_df.iterrows():
    # for each scan create a new row with all the constants repeated
    for scan_idx in range(1, max_number_of_scans + 1):
        associated_scan_data = row[[col for col in non_constant_columns if col.endswith(f'_{scan_idx}')]]
        # if all values are nan, skip
        if associated_scan_data.isna().all():
            continue

        new_row = row[identifier_columns + constants_columns].copy()
        # add scan number to identifier columns
        new_row['scan_idx'] = scan_idx

        for col in associated_scan_data.index:
            new_row['_'.join(col.split('_')[:-1]).strip()] = associated_scan_data[col]

        # add new_row to reorganized_data_df as a new row
        reorganized_data_df = pd.concat([reorganized_data_df, new_row.to_frame().T])
reorganized_data_df.reset_index(drop=True, inplace=True)

In [None]:
reorganized_data_df.rename(
    columns={
        'NPi - right side': 'NPi_right',
        'NPi - left side': 'NPi_left',
        'Pupil size right': 'Pupil_size_right',
        'Pupil size left': 'Pupil_size_left',
        'Velocity of constriction right': 'Velocity_of_constriction_right',
        'Velocity of constriction left': 'Velocity_of_constriction_left',
    },
    inplace=True
)

In [None]:
# add mean for NPi, pupil size and CV
reorganized_data_df['NPi_mean'] = reorganized_data_df[['NPi_right', 'NPi_left']].mean(axis=1)
reorganized_data_df['Pupil_size_mean'] = reorganized_data_df[['Pupil_size_right', 'Pupil_size_left']].mean(axis=1)
reorganized_data_df['Velocity_of_constriction_mean'] = reorganized_data_df[['Velocity_of_constriction_right', 'Velocity_of_constriction_left']].mean(axis=1)

In [None]:
reorganized_data_df['CT_Result'] = reorganized_data_df['CT_Result'].astype(int)
reorganized_data_df['CT_Result_dichotomized'] = reorganized_data_df['CT_Result'].isin([1, 2, 4, 5, 6]).astype(int)

In [None]:
# create a common plot with 3 subplots for each patient (NPi, CV, Si) using seaborn
# add CT results on top of plot (x axis is time, y axis is NPi, CV, Si, color is CT result)
fig = plt.figure(figsize=(20, 120))

display_ct = True
ct_result = 'CT_Result_dichotomized'

if ct_result == 'CT_Result_dichotomized':
    # palette with 2 colors (green and red)
    palette = ['#2ca02c', '#d62728']
else:
    palette = sns.light_palette("seagreen", n_colors=7)

n_patients = pupillometry_df['pNr'].nunique()

reorganized_data_df['CT_Time'] = pd.to_datetime(reorganized_data_df['CT_Time'])
pupillometry_df['Pupillometrie_Zeitpunkt'] = pd.to_datetime(pupillometry_df['Pupillometrie_Zeitpunkt'])

temp_df = reorganized_data_df
temp_df['NPi_mean'].fillna(0, inplace=True)
temp_df['Velocity_of_constriction_mean'].fillna(0, inplace=True)
temp_df['Pupil_size_mean'].fillna(0, inplace=True)

# one patient per row, 3 columns
for i, pid in enumerate(pupillometry_df['pNr'].unique()):
    # if patient has only missing data in relevant columns, skip
    if pupillometry_df[pupillometry_df['pNr'] == pid][['NPi_r_wert', 'NPi_r_wert.1', 'CV_r_wert', 'CV_r_wert.1', 'Si_r_wert', 'Si_r_wert.1']].isna().all().all():
        continue
    
    [ax1, ax2, ax3] = fig.add_subplot(n_patients, 3, i * 3 + 1), fig.add_subplot(n_patients, 3, i * 3 + 2), fig.add_subplot(n_patients, 3, i * 3 + 3)
    
    if display_ct:
        size = 100
        for idx, row in temp_df[temp_df['pNr'] == pid].iterrows():
            sns.scatterplot(x=[row['CT_Time']], y=[row['NPi_mean']], color=palette[row[ct_result]], ax=ax1, s=size, label=f'CT: {row["CT_Result"]}')
            sns.scatterplot(x=[row['CT_Time']], y=[row['Velocity_of_constriction_mean']], color=palette[row[ct_result]], ax=ax2, s=size, label=f'CT: {row["CT_Result"]}')
            sns.scatterplot(x=[row['CT_Time']], y=[row['Pupil_size_mean']], color=palette[row[ct_result]], ax=ax3, s=size, label=f'CT: {row["CT_Result"]}')
            for ax in [ax1, ax2, ax3]:
                plt.setp(ax.lines, zorder=100)
                plt.setp(ax.collections, zorder=100)
    
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert.1', ax=ax1, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert', ax=ax1, label='right')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert.1', ax=ax1)
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='NPi_r_wert', ax=ax1)
    ax1.set_ylabel('NPi')
            
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert.1', ax=ax2, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert', ax=ax2, label='right')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert.1', ax=ax2)
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='CV_r_wert', ax=ax2)
    ax2.set_ylabel('CV')    
    
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert.1', ax=ax3, label='left')
    sns.lineplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert', ax=ax3, label='right')
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert.1', ax=ax3)
    sns.scatterplot(data=pupillometry_df[pupillometry_df['pNr'] == pid], x='Pupillometrie_Zeitpunkt', y='Si_r_wert', ax=ax3)
    ax3.set_ylabel('Size')
      
            
    # remove duplicate legend entries
    for ax in [ax1, ax2, ax3]:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        ax.legend(by_label.values(), by_label.keys())
    
    
    # tilt x ticks
    _ = [ax.set_xticklabels(ax.get_xticklabels(), rotation=15, ha='right') for ax in [ax1, ax2, ax3]]
    # format x ticks by splitting at first space
    _ = [ax.set_xticklabels([x.get_text().split(' ')[0] for x in ax.get_xticklabels()]) for ax in [ax1, ax2, ax3]]
    
    # if n ticks over 10, set max 10 ticks
    if len(ax1.get_xticklabels()) > 10:
        _ = [ax.xaxis.set_major_locator(plt.MaxNLocator(10)) for ax in [ax1, ax2, ax3]]
    
    # remove x label
    _ = [ax.set_xlabel('') for ax in [ax1, ax2, ax3]]
    
    # set title for row
    ax2.set_title('Patient {}'.format(pid))

In [None]:
# save figure to file
fig.savefig('', dpi=300, bbox_inches='tight')