In [1]:
import pandas as pd
import numpy as np
import importlib
import sys
sys.path.append('..')

import consts
importlib.reload(consts)

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 50

# pd set seed
np.random.seed(42)

In [2]:
df = pd.read_csv('..\processed\inputevents_decision_only.csv')

In [3]:
icustays_filtered = pd.read_csv("..\\filtered\\filtered_icustays.csv")
inputevents = df.merge(icustays_filtered[["stay_id", "first_careunit"]], left_on="stay_id", right_on="stay_id")
inputevents_SICU_MICU = inputevents[inputevents["first_careunit"].isin(["Medical Intensive Care Unit (MICU)", "Surgical Intensive Care Unit (SICU)"])]

In [4]:
def generate_equal_size_group(group):
    smaller_size = group["first_careunit"].value_counts().min()
    smaller_gr = group["first_careunit"].value_counts().index[group.first_careunit.value_counts().argmin()]
    bigger_gr = group["first_careunit"].value_counts().index[group.first_careunit.value_counts().argmax()]
    subgroup1 = group[group["first_careunit"] == smaller_gr]
    subgroup2 = group[group["first_careunit"] == bigger_gr].sample(smaller_size, random_state=42)
    return pd.concat([subgroup1, subgroup2], axis=0)

In [5]:
def permutation_test_mean(x_1,x_2,num_perms):
    """
    Performs a permutation test on the difference in means between two arrays.
    x_1, x_2: array of a single gene, same length
    """
    T = np.abs(np.mean(x_1)-np.mean(x_2))
    n = len(x_1)
    T_perm = np.zeros(num_perms)
    X = np.concatenate((x_1,x_2))
    for j in range(num_perms):
        idx = np.random.permutation(len(X))
        T_perm[j] = np.abs(np.mean(X[idx[:n]])-np.mean(X[idx[(n+1):]]))
    p_value = len(np.where(T_perm>T)[0]) / num_perms    
    return p_value

In [6]:
groups_list = []
for bp_range, i in zip(consts.BP_RANGES, range(0,len(consts.BP_RANGES))):
    bp_low, bp_high = bp_range 
    bp_section_events = inputevents_SICU_MICU[(inputevents_SICU_MICU["bp_val"] >= bp_low) & 
                                (inputevents_SICU_MICU["bp_val"] <= bp_high) & 
                                (inputevents_SICU_MICU["originalrate"] <= 1) & 
                                (inputevents_SICU_MICU["originalrate"] >= 0)]
    bp_section_events = generate_equal_size_group(bp_section_events)
    groups_list.append(bp_section_events)


In [7]:
p_val_array = []
for i in range(0,len(consts.BP_RANGES)):
    bp_section_events = groups_list[i]
    s_icu_group = bp_section_events[bp_section_events["first_careunit"] == "Surgical Intensive Care Unit (SICU)"]
    m_icu_group = bp_section_events[bp_section_events["first_careunit"] == "Medical Intensive Care Unit (MICU)"]
    p_val_array.append(permutation_test_mean(s_icu_group["originalrate"], m_icu_group["originalrate"], 15000))


In [8]:
bp_range_pval_array_df = pd.DataFrame({"bp_range": consts.BP_RANGES, "p_val": p_val_array})
bp_range_pval_array_df["is pval < 0.05"] = bp_range_pval_array_df["p_val"] < 0.05
bp_range_pval_array_df.transpose()

Unnamed: 0,0,1,2,3,4,5,6,7
bp_range,"(0, 49)","(50, 59)","(60, 64)","(65, 69)","(70, 74)","(75, 79)","(80, 89)","(90, 200)"
p_val,0.825667,0.0012,0.138,0.0127333,0.0116,0.0105333,0.00393333,0.0002
is pval < 0.05,False,True,False,True,True,True,True,True


In [9]:
mu_s_b1 = groups_list[1][groups_list[1]["first_careunit"] == "Surgical Intensive Care Unit (SICU)"]["originalrate"].mean()
mu_m_b1 = groups_list[1][groups_list[1]["first_careunit"] == "Medical Intensive Care Unit (MICU)"]["originalrate"].mean()

In [10]:
np.abs(mu_s_b1 - mu_m_b1)

0.011475135525003732

In [11]:
mu_s_b3 = groups_list[3][groups_list[3]["first_careunit"] == "Surgical Intensive Care Unit (SICU)"]["originalrate"].mean()
mu_m_b3 = groups_list[3][groups_list[3]["first_careunit"] == "Medical Intensive Care Unit (MICU)"]["originalrate"].mean()
np.abs(mu_s_b3 - mu_m_b3)

0.007166936841500993

In [12]:
mu_s = inputevents_SICU_MICU[inputevents_SICU_MICU["first_careunit"] == "Surgical Intensive Care Unit (SICU)"]["originalrate"].mean()
mu_m = inputevents_SICU_MICU[inputevents_SICU_MICU["first_careunit"] == "Medical Intensive Care Unit (MICU)"]["originalrate"].mean()
np.abs(mu_s - mu_m)

0.009293511379407426

In [13]:
permutation_test_mean(inputevents_SICU_MICU[inputevents_SICU_MICU["first_careunit"] == "Surgical Intensive Care Unit (SICU)"]["originalrate"],
                        inputevents_SICU_MICU[inputevents_SICU_MICU["first_careunit"] == "Medical Intensive Care Unit (MICU)"]["originalrate"],
                        15000)

0.0026