In [None]:
pip install -r requirements.txt

In [None]:
## Imports

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as stats
import lifelines
import math
import statsmodels.api as sm
from lifelines.statistics import *
from scipy.stats import fisher_exact
import seaborn as sns
from HypernatremiaAnalysisFuncs import *

In [None]:
DATA_FOLDER = r'/Users/libi/Documents/Research/Hypernatremia/FinalVersions/ForGit'
DATA_FILE_NAME = r'Hypernatremia_combined_data_reduced.csv'
df = pd.read_csv(os.path.join(DATA_FOLDER, DATA_FILE_NAME))


In [None]:
### Set parameters

INIT_SOD_THRESH_UPPER = 190
INIT_SOD_THRESH_LOWER = 155
CORR_RATE_OUTLIER_THRESH = 0.0005
THRESHOLDS = [-8, -10, -12]
HYP_ON_ADMISSION_COL = 'Hypernatremia admission 1 or hospitalization 2'
CORR_RATE_OVERALL_COL = 'Is_slow_corr_overall'
CORR_RATE_MAX_COL = 'Is_slow_maxcorr'
GENDER_COL = 'Gender 1 = M 2 =F'
MAX_RANGE = 15
LAB_RES_MAX_TD = 26
WEIGHTINGS_STR = 'fleming-harrington'

# Set Group colors
COLOR_DICT= {'color_on_ad_fast' : '#ca6702',
             'color_on_ad_slow' : '#94d2bd',
             'color_on_ad' : '#6a4c93',
             'color_hos_aq' : '#1982c4',
             'color_hos_aq_fast' : '#bb3e03',
             'color_hos_aq_slow' : '#005f73'}


In [None]:
#### Extract columns representing soduium correction rates and times

sodium_columns = df.iloc[:,[df.columns.get_loc(col) for col in df.columns if col.startswith('sodium') and ('numeric' in col)
                  and ('low' not in col) and ('urine' not in col)]]
initial_sod_col = sodium_columns.columns[0]

In [None]:
#### Remove patients with initial sodium levels not corresponding to severe hypernatremia

df = df[(df[initial_sod_col]>=INIT_SOD_THRESH_LOWER) & (df[initial_sod_col]<INIT_SOD_THRESH_UPPER)]
sodium_columns = sodium_columns[(sodium_columns[initial_sod_col]>=INIT_SOD_THRESH_LOWER) & \
                                (sodium_columns[initial_sod_col]<INIT_SOD_THRESH_UPPER)]

In [None]:
#%% Extract columns representing soduium correction rates and times

corr_rate_columns = df.iloc[:,[df.columns.get_loc(col) for col in df.columns if 'corection' in col]]
corr_hour_columns = df.iloc[:,[df.columns.get_loc(col) for col in df.columns if ('Reference' in col)
                               and ('Collection' in col) and ('sodium' in col) 
                               and ('lowest' not in col) and ('urine' not in col)]]
max_sodium = sodium_columns.idxmax(axis = 1)

In [None]:
#%% Clear rate outliers from all data frames

df, corr_rate_columns, sodium_columns, corr_hour_columns, \
    max_sodium = clear_outliers(df, corr_rate_columns,sodium_columns, corr_hour_columns, max_sodium, \
                               CORR_RATE_OUTLIER_THRESH)

In [None]:
#%% Calculate sodium correction within the time frame of first 24 hours following hypernatremia detection

sod_24_hour_df =  calculate_daily_sod_corr(sodium_columns, corr_hour_columns)
df = df.merge(sod_24_hour_df, how = 'inner', left_index=True, right_index=True)            
            
del sod_24_hour_df

In [None]:
#%% Caclculate Sodium correction-related columns

df = define_correction_columns(df, sodium_columns, corr_rate_columns, corr_hour_columns)

In [None]:
#%% Calculate supporting info (BUN/creatinine ratio, glucose outliers, ICU status)

df = create_BUN_creatinine_columns(df)
df = check_glucose_outliers(df)
df = check_if_from_ICU(df)

In [None]:
#%% Calculate Daily Correction Rates

df = daily_correction_above_threshs(df, THRESHOLDS)  

In [None]:
#%% Divide data into groups

on_admission = df[df[HYP_ON_ADMISSION_COL]==1]
on_hospitalization = df[df[HYP_ON_ADMISSION_COL]==2]

on_ad_low_rate = on_admission[on_admission[CORR_RATE_OVERALL_COL]==True] 
on_ad_high_rate = on_admission[on_admission[CORR_RATE_OVERALL_COL]==False]

on_hos_low_rate = on_hospitalization[on_hospitalization[CORR_RATE_OVERALL_COL]==True] 
on_hos_high_rate = on_hospitalization[on_hospitalization[CORR_RATE_OVERALL_COL]==False]

all_slow_rate = df[df[CORR_RATE_OVERALL_COL]==True]
all_high_rate = df[df[CORR_RATE_OVERALL_COL]==False]

# on_ad_max_corr_slow = on_admission[on_admission[CORR_RATE_MAX_COL]==True]
# on_ad_max_corr_fast= on_admission[on_admission[CORR_RATE_MAX_COL]==False]

# on_hos_max_corr_slow = on_hospitalization[on_hospitalization[CORR_RATE_MAX_COL]==True]
# on_hos_max_corr_fast= on_hospitalization[on_hospitalization[CORR_RATE_MAX_COL]==False]

# all_slow_max_rate = df[df[CORR_RATE_MAX_COL]==True]
# all_high_max_rate = df[df[CORR_RATE_MAX_COL]==False]

all_males = df[df[GENDER_COL]==1]
all_females = df[df[GENDER_COL]==2]

# reached_eunatremia_slow = all_slow_rate[all_slow_rate['Reached_normal']==True]
# reached_eunatremia_fast = all_high_rate[all_high_rate['Reached_normal']==True]

# no_eunatremia_slow = all_slow_rate[all_slow_rate['Reached_normal']==False]
# no_eunatremia_fast = all_high_rate[all_high_rate['Reached_normal']==False]


# slow_males = all_slow_rate[all_slow_rate[GENDER_COL]==1]
# slow_females = all_slow_rate[all_slow_rate[GENDER_COL]==2]

# fast_males = all_high_rate[all_high_rate[GENDER_COL]==1]
# fast_females = all_high_rate[all_high_rate[GENDER_COL]==2]

# hos_ac_males = on_hospitalization[on_hospitalization[GENDER_COL]==1]
# hos_ac_females = on_hospitalization[on_hospitalization[GENDER_COL]==2]

# on_ad_males = on_admission[on_admission[GENDER_COL]==1]
# on_ad_females = on_admission[on_admission[GENDER_COL]==2]

# on_hos_low_rate_males = on_hos_low_rate[on_hos_low_rate[GENDER_COL]==1]
# on_hos_low_rate_females = on_hos_low_rate[on_hos_low_rate[GENDER_COL]==2]

# on_hos_high_rate_males = on_hos_high_rate[on_hos_high_rate[GENDER_COL]==1]
# on_hos_high_rate_females = on_hos_high_rate[on_hos_high_rate[GENDER_COL]==2]

# on_ad_low_rate_males = on_ad_low_rate[on_ad_low_rate[GENDER_COL]==1]
# on_ad_low_rate_females = on_ad_low_rate[on_ad_low_rate[GENDER_COL]==2]

# on_ad_high_rate_males = on_ad_high_rate[on_ad_high_rate[GENDER_COL]==1]
# on_ad_high_rate_females = on_ad_high_rate[on_ad_high_rate[GENDER_COL]==2]





In [None]:
#%% Plot mortality vs daily correction rate bar plots

p = plot_mortality_vs_daily_corr_bar(df, COLOR_DICT['color_on_ad'], MAX_RANGE)

In [None]:
#%% Plot mortality vs continuous daily correction rate

cont_morr_rate_df = plot_mortality_vs_cont_daily_corr(df,  COLOR_DICT['color_on_ad'], COLOR_DICT['color_hos_aq'])

In [None]:
#%% Plot mortality vs daily correction rate bar plots across different thresholds

bar_allpats_30day = compare_mortality_across_daily_corrections(df, THRESHOLDS, 'All_patients')
# bar_allslow_30day = compare_mortality_across_daily_corrections(all_slow_rate, THRESHOLDS, 'All_slow')
# bar_allfast_30day =compare_mortality_across_daily_corrections(all_high_rate, THRESHOLDS, 'All_slow')

In [None]:
#%% Analyze group statistics (subgroup out of main group)


analyze_group_stats(on_ad_high_rate, on_admission, LAB_RES_MAX_TD)
# analyze_group_stats(on_ad_low_rate, on_admission, LAB_RES_MAX_TD)

# analyze_group_stats(on_hos_high_rate, on_hospitalization, LAB_RES_MAX_TD)
# analyze_group_stats(on_hos_low_rate, on_hospitalization, LAB_RES_MAX_TD)

# analyze_group_stats(all_high_rate, df, LAB_RES_MAX_TD)
# analyze_group_stats(all_slow_rate, df, LAB_RES_MAX_TD)

# analyze_group_stats(on_admission, df, LAB_RES_MAX_TD)
# analyze_group_stats(on_hospitalization, df, LAB_RES_MAX_TD)

# analyze_group_stats(df, df, LAB_RES_MAX_TD)
# analyze_group_stats(all_males, df, LAB_RES_MAX_TD )
# analyze_group_stats(all_females, df, LAB_RES_MAX_TD )

# analyze_group_stats(slow_males, all_slow_rate, LAB_RES_MAX_TD )
# analyze_group_stats(slow_females, all_slow_rate, LAB_RES_MAX_TD )

# analyze_group_stats(fast_males, all_high_rate, LAB_RES_MAX_TD )
# analyze_group_stats(fast_females, all_high_rate, LAB_RES_MAX_TD )

# analyze_group_stats(hos_ac_males, on_hospitalization, LAB_RES_MAX_TD )
# analyze_group_stats(hos_ac_females, on_hospitalization, LAB_RES_MAX_TD )

# analyze_group_stats(on_ad_males, on_admission, LAB_RES_MAX_TD )
# analyze_group_stats(on_ad_females, on_admission, LAB_RES_MAX_TD )

# analyze_group_stats(on_hos_low_rate_males, on_hos_low_rate, LAB_RES_MAX_TD)
# analyze_group_stats(on_hos_high_rate_males, on_hos_high_rate, LAB_RES_MAX_TD)

# analyze_group_stats(on_hos_low_rate_females, on_hos_low_rate, LAB_RES_MAX_TD)
# analyze_group_stats(on_hos_high_rate_females, on_hos_high_rate, LAB_RES_MAX_TD)

# analyze_group_stats(on_ad_low_rate_males, on_ad_low_rate, LAB_RES_MAX_TD)
# analyze_group_stats(on_ad_low_rate_females, on_ad_low_rate, LAB_RES_MAX_TD)

# analyze_group_stats(on_ad_high_rate_males, on_ad_high_rate, LAB_RES_MAX_TD)
# analyze_group_stats(on_ad_high_rate_females, on_ad_high_rate, LAB_RES_MAX_TD)

# analyze_group_stats(reached_eunatremia_slow, all_slow_rate, LAB_RES_MAX_TD)
# analyze_group_stats(reached_eunatremia_fast, all_high_rate, LAB_RES_MAX_TD)



In [None]:
### Compare stats between two comparable groups
    
compare_general_stats(on_ad_low_rate,on_ad_high_rate)

# compare_general_stats(on_hos_low_rate, on_hos_high_rate)

# compare_general_stats(on_admission, on_hospitalization)

# compare_general_stats(all_slow_rate, all_high_rate)
    
# compare_general_stats(on_ad_low_rate, on_hos_low_rate)

# compare_general_stats(on_ad_high_rate, on_hos_high_rate)

# compare_general_stats(reached_eunatremia_slow, reached_eunaÃ·tremia_fast)



In [None]:
#%% Accessory plots of sodium levels in different groups    

group_name = 'On Admission - CR<=0.5 mmol/L/h'
analyze_group_sodium(on_ad_low_rate, group_name, COLOR_DICT['color_on_ad_slow'])



In [None]:
#%% Compare Sodium stats between two matching groups


compare_group_sodium_stats(on_ad_low_rate,on_ad_high_rate)
# compare_group_sodium_stats(on_hos_low_rate, on_hos_high_rate)
# compare_group_sodium_stats(on_hospitalization, on_admission)
# compare_group_sodium_stats(all_slow_rate, all_high_rate)


In [None]:
#%% Kaplen-Mayer curves

KM_title_adm = 'Hypernatremia on admission - KM survival plots'    
plot_KaplanMayer_curve(on_ad_low_rate, on_ad_high_rate, r'$\leq$0.5 mmol/L/h',
                        '>0.5 mmol/L/h', KM_title_adm, COLOR_DICT['color_on_ad_slow'],
                        COLOR_DICT['color_on_ad_fast'], WEIGHTINGS_STR, 'OnAdForMCTest')

# KM_title_hos = 'Hypernatremia hospital acquired - KM survival plots'    
# plot_KaplanMayer_curve(on_hos_low_rate, on_hos_high_rate, r'Rate $\leq$0.5 mmol/L/h',
#                         'Rate >0.5 mmol/L/h', KM_title_hos, COLOR_DICT['color_hos_aq_slow'],
#                         COLOR_DICT['color_hos_aq_fast'], WEIGHTINGS_STR , 'HosAqForMCTest')

# KM_title_all = 'Survival probability according to hypernatremia correction rate'    
# fhr_a_test = plot_KaplanMayer_curve(all_slow_rate, all_high_rate, r'$\leq$0.5 mmol/L/h',
#                         '>0.5 mmol/L/h', KM_title_all, COLOR_DICT['color_hos_aq'],
#                         COLOR_DICT['color_on_ad'], WEIGHTINGS_STR, 'AllPatsForMCTest')





In [None]:
#%% Logistic regression analysis of mortality odds ratio

na_res, extended_adj_res = analyze_mortality_odds_ratio(on_ad_low_rate,
                            on_ad_high_rate, 'On admission slow vs. fast')    

# na_res, extended_adj_res = analyze_mortality_odds_ratio(on_hos_low_rate,
#                             on_hos_high_rate, 'Hospital acq. slow vs. fast')  


# na_res, extended_adj_res = analyze_mortality_odds_ratio(on_ad_max_corr_slow, on_ad_max_corr_fast,
#                               'On admission slow vs. fast max corr. rate')    
# na_res, extended_adj_res = analyze_mortality_odds_ratio(on_hos_max_corr_slow, on_hos_max_corr_fast,
#                               'Hospital acq. slow vs. fast max corr. rate')    
    
# na_res, extended_adj_res = analyze_mortality_odds_ratio(all_slow_rate, all_high_rate,
#                               'Fast vs. Slow correction rate')    


# na_res, extended_adj_res = analyze_mortality_odds_ratio(all_slow_max_rate, all_high_max_rate,
#                               'Fast vs. Slow max correction rate')        

