In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind,chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('../processed_data/merged_data.csv')
data.head()

Unnamed: 0,encrypt_mrn,ed_screened,visit_date,new_diagnosis,pcp_followup,max_sbp,min_sbp,max_dbp,min_dbp,max_diff,...,max_value_HEMOGLOBIN A1C,"max_value_LDL CHOLESTEROL, CALCULATED","min_value_CHOLESTEROL, TOTAL",min_value_CREATININE,min_value_GLUCOSE,"min_value_GLUCOSE, POC",min_value_HDL CHOLESTEROL,min_value_HEMOGLOBIN A1C,"min_value_LDL CHOLESTEROL, CALCULATED",visit_type
0,2303579,NYU Brooklyn,2023-01-10,Yes,Yes,133.0,114.0,96.0,64.0,52.0,...,9.3,176.0,268.0,0.74,305.0,230.0,31.0,9.3,176.0,Hospitalization
1,2117042,NYU Long Island,2022-01-13,Yes,Yes,102.0,102.0,69.0,69.0,33.0,...,,,,,,443.0,,,,Hospitalization
2,2117326,NYU Brooklyn,2022-01-14,Yes,No,158.0,148.0,88.0,82.0,76.0,...,11.2,,,0.7,423.0,404.0,,11.2,,Emergency
3,2112782,NYU Brooklyn,2022-01-16,Yes,No,181.0,146.0,93.0,87.0,88.0,...,11.6,,,1.03,418.0,491.0,,11.6,,Emergency
4,2062174,NYU Cobble Hill,2022-01-19,Yes,Yes,177.0,135.0,100.0,68.0,77.0,...,8.3,,,,,181.0,,8.3,,Emergency


In [3]:
for i in data.columns:
    print(i)

encrypt_mrn
ed_screened
visit_date
new_diagnosis
pcp_followup
max_sbp
min_sbp
max_dbp
min_dbp
max_diff
avg_sbp
avg_dbp
avg_diff
test_nums
encrypt_csn_x
age
sex
detailed_race
language
insurance_type
bmi
height
weight
dm_on_pl
dm_on_pmh
htn_on_pl
htn_on_pmh
hld_on_pl
hld_on_pmh
gdm_on_pl
gdm_on_pmh
pcos_on_pl
pcos_on_pmh
cad_on_pl
cad_on_pmh
chf_on_pl
chf_on_pmh
depression_on_pl
depression_on_pmh
cva_on_pl
cva_on_pmh
family_dm
tobacco_user
dm_meds
htn_meds
statin_meds
aspirin_meds
plavix_meds
lasix_meds
arrival_date
disposition
ed_name
chiefcomplaint
sbp_1st
dbp_1st
bpa_response
primary_care
hba1c
poct_gluc
chem_gluc
chem_creat
encrypt_csn_y
match_status
match_type
gisjoin
metro_area
county
state
zip_code
no_addy
unk_addy
po_box
intl_addy
homeless
nyc_tract
total_pop
households
housing_units
p_children
p_elderly
p_adults
p_female
mdn_age
p_nhwhite
p_nhblack
p_hispanic
p_nhasian
p_other
p_moved
p_longcommute
p_marriednone
p_marriedkids
p_singlenone
p_malekids
p_femalekids
p_cohabitkids
p_

In [4]:
data['pcp_followup'].value_counts()

pcp_followup
Yes    204
No      50
Name: count, dtype: int64

In [5]:
# follow up / no follow up
yes = data[data['pcp_followup'] == 'Yes']
no = data[data['pcp_followup'] == 'No']


In [6]:
# age

yes_age = yes['age'].to_numpy()
no_age = no['age'].to_numpy()
t_stat, p_value = ttest_ind(yes_age, no_age, equal_var=True) 
print('follow up mean age:', yes_age.mean().round(1))
print('no follow up mean age:', no_age.mean().round(1))
print('Age p-value:', p_value)


follow up mean age: 53.0
no follow up mean age: 51.3
Age p-value: 0.2808737545923621


In [7]:
# sex
# sex (1: female, 2: male, 99: unknown, 951: other)

# male
yes_male = yes[yes['sex'] == 'Male'].shape[0]
no_male = no[no['sex'] == 'Male'].shape[0]
print('follow up male proportion:', round(yes_male / yes.shape[0], 3))
print('no follow up male proportion:', round(no_male / no.shape[0], 3))

yes_other = yes.shape[0] - yes_male
no_other = no.shape[0] - no_male

sex = np.array([[yes_male, yes_other], 
                 [no_male, no_other]])  

chi2, p, dof, expected = chi2_contingency(sex)
print('Chi-squared test p-value:', p)

follow up male proportion: 0.593
no follow up male proportion: 0.66
Chi-squared test p-value: 0.4803451287200199


In [8]:
# race

yes_detailed_race = yes['detailed_race'].value_counts()
no_detailed_race = no['detailed_race'].value_counts()
all_categories = set(yes_detailed_race.index).union(set(no_detailed_race.index))
yes_aligned = yes_detailed_race.reindex(all_categories, fill_value=0)
no_aligned = no_detailed_race.reindex(all_categories, fill_value=0)
detailed_race = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(detailed_race)
print('Chi-squared test p-value:', p)

detailed_race
Hispanic    0.318627
Other       0.063725
Asian       0.058824
Black       0.240196
White       0.318627
Name: count, dtype: float64
detailed_race
Hispanic    0.42
Other       0.12
Asian       0.06
Black       0.26
White       0.14
Name: count, dtype: float64
Chi-squared test p-value: 0.11150481115755405


In [9]:
# language

yes_language = yes['language'].value_counts()
no_language = no['language'].value_counts()
all_categories = set(yes_language.index).union(set(no_language.index))
yes_aligned = yes_language.reindex(all_categories, fill_value=0)
no_aligned = no_language.reindex(all_categories, fill_value=0)
language = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(language)
print('Chi-squared test p-value:', p)

language
Spanish    0.171569
Other      0.019608
English    0.808824
Name: count, dtype: float64
language
Spanish    0.34
Other      0.06
English    0.60
Name: count, dtype: float64
Chi-squared test p-value: 0.006026807028311744


In [10]:
# insurance_type

yes_insurance_type = yes['insurance_type'].value_counts()
no_insurance_type = no['insurance_type'].value_counts()
all_categories = set(yes_insurance_type.index).union(set(no_insurance_type.index))
yes_aligned = yes_insurance_type.reindex(all_categories, fill_value=0)
no_aligned = no_insurance_type.reindex(all_categories, fill_value=0)
insurance_type = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(insurance_type)
print('Chi-squared test p-value:', p)

insurance_type
SELFPAY     0.049020
PRIVATE     0.529412
MEDICARE    0.151961
OTHER       0.004902
MEDICAID    0.264706
Name: count, dtype: float64
insurance_type
SELFPAY     0.18
PRIVATE     0.22
MEDICARE    0.08
OTHER       0.00
MEDICAID    0.52
Name: count, dtype: float64
Chi-squared test p-value: 1.5896543265022498e-05


In [11]:
# bmi 

yes_bmi = yes['bmi'].dropna().to_numpy()
no_bmi = no['bmi'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_bmi, no_bmi, equal_var=True) 
print('follow up mean bmi:', yes_bmi.mean().round(1))
print('no follow up mean bmi:', no_bmi.mean().round(1))
print('bmi p-value:', p_value)

follow up mean bmi: 34.7
no follow up mean bmi: 33.4
bmi p-value: 0.28781223974297954


In [12]:
# height 

yes_height = yes['height'].dropna().to_numpy()
no_height = no['height'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_height, no_height, equal_var=True) 
print('follow up mean height:', yes_height.mean().round(1))
print('no follow up mean height:', no_height.mean().round(1))
print('height p-value:', p_value)

follow up mean height: 5.6
no follow up mean height: 5.5
height p-value: 0.6129339194172966


In [13]:
# weight 

yes_weight = yes['weight'].dropna().to_numpy()
no_weight = no['weight'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_weight, no_weight, equal_var=True) 
print('follow up mean weight:', yes_weight.mean().round(1))
print('no follow up mean weight:', no_weight.mean().round(1))
print('weight p-value:', p_value)

follow up mean weight: 224.3
no follow up mean weight: 210.1
weight p-value: 0.13377476123175452


In [14]:
# dm_on_pl

yes_dm_on_pl = yes['dm_on_pl'].value_counts()
no_dm_on_pl = no['dm_on_pl'].value_counts()
all_categories = set(yes_dm_on_pl.index).union(set(no_dm_on_pl.index))
yes_aligned = yes_dm_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_dm_on_pl.reindex(all_categories, fill_value=0)
dm_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_on_pl)
print('Chi-squared test p-value:', p)

dm_on_pl
No     0.95098
Yes    0.04902
Name: count, dtype: float64
dm_on_pl
No     0.9
Yes    0.1
Name: count, dtype: float64
Chi-squared test p-value: 0.3003078819424843


In [15]:
# dm_on_pmh

yes_dm_on_pmh = yes['dm_on_pmh'].value_counts()
no_dm_on_pmh = no['dm_on_pmh'].value_counts()
all_categories = set(yes_dm_on_pmh.index).union(set(no_dm_on_pmh.index))
yes_aligned = yes_dm_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_dm_on_pmh.reindex(all_categories, fill_value=0)
dm_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_on_pmh)
print('Chi-squared test p-value:', p)

dm_on_pmh
No     0.862745
Yes    0.137255
Name: count, dtype: float64
dm_on_pmh
No     0.8
Yes    0.2
Name: count, dtype: float64
Chi-squared test p-value: 0.37156644803743766


In [16]:
# htn_on_pl

yes_htn_on_pl = yes['htn_on_pl'].value_counts()
no_htn_on_pl = no['htn_on_pl'].value_counts()
all_categories = set(yes_htn_on_pl.index).union(set(no_htn_on_pl.index))
yes_aligned = yes_htn_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_htn_on_pl.reindex(all_categories, fill_value=0)
htn_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_on_pl)
print('Chi-squared test p-value:', p)

htn_on_pl
No     0.848039
Yes    0.151961
Name: count, dtype: float64
htn_on_pl
No     0.86
Yes    0.14
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [17]:
# htn_on_pmh

yes_htn_on_pmh = yes['htn_on_pmh'].value_counts()
no_htn_on_pmh = no['htn_on_pmh'].value_counts()
all_categories = set(yes_htn_on_pmh.index).union(set(no_htn_on_pmh.index))
yes_aligned = yes_htn_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_htn_on_pmh.reindex(all_categories, fill_value=0)
htn_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_on_pmh)
print('Chi-squared test p-value:', p)

htn_on_pmh
No     0.598039
Yes    0.401961
Name: count, dtype: float64
htn_on_pmh
No     0.74
Yes    0.26
Name: count, dtype: float64
Chi-squared test p-value: 0.08986119717756008


In [18]:
# gdm_on_pl

yes_gdm_on_pl = yes['gdm_on_pl'].value_counts()
no_gdm_on_pl = no['gdm_on_pl'].value_counts()
all_categories = set(yes_gdm_on_pl.index).union(set(no_gdm_on_pl.index))
yes_aligned = yes_gdm_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_gdm_on_pl.reindex(all_categories, fill_value=0)
gdm_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(gdm_on_pl)
print('Chi-squared test p-value:', p)

gdm_on_pl
No    1.0
Name: count, dtype: float64
gdm_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [19]:
# gdm_on_pmh

yes_gdm_on_pmh = yes['gdm_on_pmh'].value_counts()
no_gdm_on_pmh = no['gdm_on_pmh'].value_counts()
all_categories = set(yes_gdm_on_pmh.index).union(set(no_gdm_on_pmh.index))
yes_aligned = yes_gdm_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_gdm_on_pmh.reindex(all_categories, fill_value=0)
gdm_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(gdm_on_pmh)
print('Chi-squared test p-value:', p)

gdm_on_pmh
No    1.0
Name: count, dtype: float64
gdm_on_pmh
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [20]:
# pcos_on_pl

yes_pcos_on_pl = yes['pcos_on_pl'].value_counts()
no_pcos_on_pl = no['pcos_on_pl'].value_counts()
all_categories = set(yes_pcos_on_pl.index).union(set(no_pcos_on_pl.index))
yes_aligned = yes_pcos_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_pcos_on_pl.reindex(all_categories, fill_value=0)
pcos_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(pcos_on_pl)
print('Chi-squared test p-value:', p)

pcos_on_pl
No    1.0
Name: count, dtype: float64
pcos_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [21]:
# pcos_on_pmh

yes_pcos_on_pmh = yes['pcos_on_pmh'].value_counts()
no_pcos_on_pmh = no['pcos_on_pmh'].value_counts()
all_categories = set(yes_pcos_on_pmh.index).union(set(no_pcos_on_pmh.index))
yes_aligned = yes_pcos_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_pcos_on_pmh.reindex(all_categories, fill_value=0)
pcos_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(pcos_on_pmh)
print('Chi-squared test p-value:', p)

pcos_on_pmh
No     0.985294
Yes    0.014706
Name: count, dtype: float64
pcos_on_pmh
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 0.8947741639643179


In [22]:
# cad_on_pl

yes_cad_on_pl = yes['cad_on_pl'].value_counts()
no_cad_on_pl = no['cad_on_pl'].value_counts()
all_categories = set(yes_cad_on_pl.index).union(set(no_cad_on_pl.index))
yes_aligned = yes_cad_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_cad_on_pl.reindex(all_categories, fill_value=0)
cad_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cad_on_pl)
print('Chi-squared test p-value:', p)

cad_on_pl
No     0.97549
Yes    0.02451
Name: count, dtype: float64
cad_on_pl
No     0.98
Yes    0.02
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [23]:
# cad_on_pmh

yes_cad_on_pmh = yes['cad_on_pmh'].value_counts()
no_cad_on_pmh = no['cad_on_pmh'].value_counts()
all_categories = set(yes_cad_on_pmh.index).union(set(no_cad_on_pmh.index))
yes_aligned = yes_cad_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_cad_on_pmh.reindex(all_categories, fill_value=0)
cad_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cad_on_pmh)
print('Chi-squared test p-value:', p)

cad_on_pmh
No     0.97549
Yes    0.02451
Name: count, dtype: float64
cad_on_pmh
No     0.98
Yes    0.02
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [24]:
# chf_on_pl

yes_chf_on_pl = yes['chf_on_pl'].value_counts()
no_chf_on_pl = no['chf_on_pl'].value_counts()
all_categories = set(yes_chf_on_pl.index).union(set(no_chf_on_pl.index))
yes_aligned = yes_chf_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_chf_on_pl.reindex(all_categories, fill_value=0)
chf_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(chf_on_pl)
print('Chi-squared test p-value:', p)

chf_on_pl
No     0.995098
Yes    0.004902
Name: count, dtype: float64
chf_on_pl
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [25]:
# chf_on_pmh

yes_chf_on_pmh = yes['chf_on_pmh'].value_counts()
no_chf_on_pmh = no['chf_on_pmh'].value_counts()
all_categories = set(yes_chf_on_pmh.index).union(set(no_chf_on_pmh.index))
yes_aligned = yes_chf_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_chf_on_pmh.reindex(all_categories, fill_value=0)
chf_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(chf_on_pmh)
print('Chi-squared test p-value:', p)

chf_on_pmh
No     0.980392
Yes    0.019608
Name: count, dtype: float64
chf_on_pmh
No     0.98
Yes    0.02
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [26]:
# depression_on_pl

yes_depression_on_pl = yes['depression_on_pl'].value_counts()
no_depression_on_pl = no['depression_on_pl'].value_counts()
all_categories = set(yes_depression_on_pl.index).union(set(no_depression_on_pl.index))
yes_aligned = yes_depression_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_depression_on_pl.reindex(all_categories, fill_value=0)
depression_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(depression_on_pl)
print('Chi-squared test p-value:', p)

depression_on_pl
No     0.970588
Yes    0.029412
Name: count, dtype: float64
depression_on_pl
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 0.4791192144625801


In [27]:
# depression_on_pmh

yes_depression_on_pmh = yes['depression_on_pmh'].value_counts()
no_depression_on_pmh = no['depression_on_pmh'].value_counts()
all_categories = set(yes_depression_on_pmh.index).union(set(no_depression_on_pmh.index))
yes_aligned = yes_depression_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_depression_on_pmh.reindex(all_categories, fill_value=0)
depression_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(depression_on_pmh)
print('Chi-squared test p-value:', p)

depression_on_pmh
No     0.95098
Yes    0.04902
Name: count, dtype: float64
depression_on_pmh
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 0.23341791348654384


In [28]:
# cva_on_pl

yes_cva_on_pl = yes['cva_on_pl'].value_counts()
no_cva_on_pl = no['cva_on_pl'].value_counts()
all_categories = set(yes_cva_on_pl.index).union(set(no_cva_on_pl.index))
yes_aligned = yes_cva_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_cva_on_pl.reindex(all_categories, fill_value=0)
cva_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cva_on_pl)
print('Chi-squared test p-value:', p)

cva_on_pl
No    1.0
Name: count, dtype: float64
cva_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [29]:
# cva_on_pmh

yes_cva_on_pmh = yes['cva_on_pmh'].value_counts()
no_cva_on_pmh = no['cva_on_pmh'].value_counts()
all_categories = set(yes_cva_on_pmh.index).union(set(no_cva_on_pmh.index))
yes_aligned = yes_cva_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_cva_on_pmh.reindex(all_categories, fill_value=0)
cva_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cva_on_pmh)
print('Chi-squared test p-value:', p)

cva_on_pmh
No     0.97549
Yes    0.02451
Name: count, dtype: float64
cva_on_pmh
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 0.5822546410717091


In [30]:
# family_dm

yes_family_dm = yes['family_dm'].dropna().to_numpy()
no_family_dm = no['family_dm'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_family_dm, no_family_dm, equal_var=True) 
print('follow up mean family_dm:', yes_family_dm.mean().round(1))
print('no follow up mean family_dm:', no_family_dm.mean().round(1))
print('family_dm p-value:', p_value)

follow up mean family_dm: 0.5
no follow up mean family_dm: 0.7
family_dm p-value: 0.48651850593482493


In [31]:
# tobacco_user

yes_tobacco_user = yes['tobacco_user'].value_counts()
no_tobacco_user = no['tobacco_user'].value_counts()
all_categories = set(yes_tobacco_user.index).union(set(no_tobacco_user.index))
yes_aligned = yes_tobacco_user.reindex(all_categories, fill_value=0)
no_aligned = no_tobacco_user.reindex(all_categories, fill_value=0)
tobacco_user = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(tobacco_user)
print('Chi-squared test p-value:', p)

tobacco_user
Quit         0.178571
Yes          0.064286
Never        0.714286
Not Asked    0.042857
Name: count, dtype: float64
tobacco_user
Quit         0.148148
Yes          0.185185
Never        0.666667
Not Asked    0.000000
Name: count, dtype: float64
Chi-squared test p-value: 0.15130170447230368


In [34]:
# dm_meds

yes_dm_meds = yes['dm_meds'].value_counts()
no_dm_meds = no['dm_meds'].value_counts()
all_categories = set(yes_dm_meds.index).union(set(no_dm_meds.index))
yes_aligned = yes_dm_meds.reindex(all_categories, fill_value=0)
no_aligned = no_dm_meds.reindex(all_categories, fill_value=0)
dm_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_meds)
print('Chi-squared test p-value:', p)

dm_meds
No     0.518325
Yes    0.481675
Name: count, dtype: float64
dm_meds
No     0.707317
Yes    0.292683
Name: count, dtype: float64
Chi-squared test p-value: 0.041867998139034204


In [35]:
# htn_meds

yes_htn_meds = yes['htn_meds'].value_counts()
no_htn_meds = no['htn_meds'].value_counts()
all_categories = set(yes_htn_meds.index).union(set(no_htn_meds.index))
yes_aligned = yes_htn_meds.reindex(all_categories, fill_value=0)
no_aligned = no_htn_meds.reindex(all_categories, fill_value=0)
htn_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_meds)
print('Chi-squared test p-value:', p)

htn_meds
No     0.774869
Yes    0.225131
Name: count, dtype: float64
htn_meds
No     0.902439
Yes    0.097561
Name: count, dtype: float64
Chi-squared test p-value: 0.10312237098734389


In [36]:
# statin_meds

yes_statin_meds = yes['statin_meds'].value_counts()
no_statin_meds = no['statin_meds'].value_counts()
all_categories = set(yes_statin_meds.index).union(set(no_statin_meds.index))
yes_aligned = yes_statin_meds.reindex(all_categories, fill_value=0)
no_aligned = no_statin_meds.reindex(all_categories, fill_value=0)
statin_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(statin_meds)
print('Chi-squared test p-value:', p)

statin_meds
No     0.691099
Yes    0.308901
Name: count, dtype: float64
statin_meds
No     0.829268
Yes    0.170732
Name: count, dtype: float64
Chi-squared test p-value: 0.11217391945443958


In [37]:
# aspirin_meds

yes_aspirin_meds = yes['aspirin_meds'].value_counts()
no_aspirin_meds = no['aspirin_meds'].value_counts()
all_categories = set(yes_aspirin_meds.index).union(set(no_aspirin_meds.index))
yes_aligned = yes_aspirin_meds.reindex(all_categories, fill_value=0)
no_aligned = no_aspirin_meds.reindex(all_categories, fill_value=0)
aspirin_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(aspirin_meds)
print('Chi-squared test p-value:', p)

aspirin_meds
No     0.842932
Yes    0.157068
Name: count, dtype: float64
aspirin_meds
No     0.902439
Yes    0.097561
Name: count, dtype: float64
Chi-squared test p-value: 0.46280934939987206


In [38]:
# plavix_meds

yes_plavix_meds = yes['plavix_meds'].value_counts()
no_plavix_meds = no['plavix_meds'].value_counts()
all_categories = set(yes_plavix_meds.index).union(set(no_plavix_meds.index))
yes_aligned = yes_plavix_meds.reindex(all_categories, fill_value=0)
no_aligned = no_plavix_meds.reindex(all_categories, fill_value=0)
plavix_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(plavix_meds)
print('Chi-squared test p-value:', p)

plavix_meds
No     0.963351
Yes    0.036649
Name: count, dtype: float64
plavix_meds
No     0.97561
Yes    0.02439
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [39]:
# lasix_meds

yes_lasix_meds = yes['lasix_meds'].value_counts()
no_lasix_meds = no['lasix_meds'].value_counts()
all_categories = set(yes_lasix_meds.index).union(set(no_lasix_meds.index))
yes_aligned = yes_lasix_meds.reindex(all_categories, fill_value=0)
no_aligned = no_lasix_meds.reindex(all_categories, fill_value=0)
lasix_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(lasix_meds)
print('Chi-squared test p-value:', p)

lasix_meds
No     0.942408
Yes    0.057592
Name: count, dtype: float64
lasix_meds
No     1.0
Yes    0.0
Name: count, dtype: float64
Chi-squared test p-value: 0.24221581489585203


In [41]:
# disposition

yes_disposition = yes['disposition'].value_counts()
no_disposition = no['disposition'].value_counts()
all_categories = set(yes_disposition.index).union(set(no_disposition.index))
yes_aligned = yes_disposition.reindex(all_categories, fill_value=0)
no_aligned = no_disposition.reindex(all_categories, fill_value=0)
disposition = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(disposition)
print('Chi-squared test p-value:', p)

disposition
AMA                           0.024510
Transfer to Procedure Area    0.004902
Admit                         0.450980
Discharge                     0.514706
LWBS after Triage             0.004902
Name: count, dtype: float64
disposition
AMA                           0.00
Transfer to Procedure Area    0.02
Admit                         0.26
Discharge                     0.72
LWBS after Triage             0.00
Name: count, dtype: float64
Chi-squared test p-value: 0.05571250170433469


In [49]:
# ed_name

yes_ed_name = yes['ed_name'].value_counts()
no_ed_name = no['ed_name'].value_counts()
all_categories = set(yes_ed_name.index).union(set(no_ed_name.index))
yes_aligned = yes_ed_name.reindex(all_categories, fill_value=0)
no_aligned = no_ed_name.reindex(all_categories, fill_value=0)
ed_name = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(ed_name)
print('Chi-squared test p-value:', p)

ed_name
NYU BROOKLYN       0.279412
NYU COBBLE HILL    0.147059
NYU LONG ISLAND    0.372549
NYU MANHATTAN      0.200980
Name: count, dtype: float64
ed_name
NYU BROOKLYN       0.48
NYU COBBLE HILL    0.04
NYU LONG ISLAND    0.30
NYU MANHATTAN      0.18
Name: count, dtype: float64
Chi-squared test p-value: 0.02442103140024817


In [52]:
# sbp_1st

yes_sbp_1st = yes['sbp_1st'].dropna().to_numpy()
no_sbp_1st = no['sbp_1st'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_sbp_1st, no_sbp_1st, equal_var=True) 
print('follow up mean sbp_1st:', yes_sbp_1st.mean().round(1))
print('no follow up mean sbp_1st:', no_sbp_1st.mean().round(1))
print('sbp_1st p-value:', p_value)

follow up mean sbp_1st: 148.7
no follow up mean sbp_1st: 143.5
sbp_1st p-value: 0.1584314128077748


In [54]:
# dbp_1st

yes_dbp_1st = yes['dbp_1st'].dropna().to_numpy()
no_dbp_1st = no['dbp_1st'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_dbp_1st, no_dbp_1st, equal_var=True) 
print('follow up mean dbp_1st:', yes_dbp_1st.mean().round(1))
print('no follow up mean dbp_1st:', no_dbp_1st.mean().round(1))
print('dbp_1st p-value:', p_value)

follow up mean dbp_1st: 88.5
no follow up mean dbp_1st: 84.2
dbp_1st p-value: 0.0804199089878003


In [56]:
# bpa_response

yes_bpa_response = yes['bpa_response'].value_counts()
no_bpa_response = no['bpa_response'].value_counts()
all_categories = set(yes_bpa_response.index).union(set(no_bpa_response.index))
yes_aligned = yes_bpa_response.reindex(all_categories, fill_value=0)
no_aligned = no_bpa_response.reindex(all_categories, fill_value=0)
bpa_response = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(bpa_response)
print('Chi-squared test p-value:', p)

bpa_response
Single Order                    0.893443
Name: count, dtype: float64
bpa_response
Single Order                    0.814815
Name: count, dtype: float64
Chi-squared test p-value: 0.41904297839249594


In [57]:
# primary_care

yes_primary_care = yes['primary_care'].value_counts()
no_primary_care = no['primary_care'].value_counts()
all_categories = set(yes_primary_care.index).union(set(no_primary_care.index))
yes_aligned = yes_primary_care.reindex(all_categories, fill_value=0)
no_aligned = no_primary_care.reindex(all_categories, fill_value=0)
primary_care = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(primary_care)
print('Chi-squared test p-value:', p)

primary_care
YES    0.536145
NO     0.463855
Name: count, dtype: float64
primary_care
YES    0.578947
NO     0.421053
Name: count, dtype: float64
Chi-squared test p-value: 0.7661953314069995


In [58]:
# hba1c

yes_hba1c = yes['hba1c'].dropna().to_numpy()
no_hba1c = no['hba1c'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_hba1c, no_hba1c, equal_var=True) 
print('follow up mean hba1c:', yes_hba1c.mean().round(1))
print('no follow up mean hba1c:', no_hba1c.mean().round(1))
print('hba1c p-value:', p_value)

follow up mean hba1c: 9.1
no follow up mean hba1c: 8.9
hba1c p-value: 0.6745409245576182


In [60]:
# poct_gluc

yes_poct_gluc = yes['poct_gluc'].dropna().to_numpy()
no_poct_gluc = no['poct_gluc'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_poct_gluc, no_poct_gluc, equal_var=True) 
print('follow up mean poct_gluc:', yes_poct_gluc.mean().round(1))
print('no follow up mean poct_gluc:', no_poct_gluc.mean().round(1))
print('poct_gluc p-value:', p_value)

follow up mean poct_gluc: 320.5
no follow up mean poct_gluc: 258.8
poct_gluc p-value: 0.08860960289610122


In [61]:
# chem_gluc

yes_chem_gluc = yes['chem_gluc'].dropna().to_numpy()
no_chem_gluc = no['chem_gluc'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_chem_gluc, no_chem_gluc, equal_var=True) 
print('follow up mean chem_gluc:', yes_chem_gluc.mean().round(1))
print('no follow up mean chem_gluc:', no_chem_gluc.mean().round(1))
print('chem_gluc p-value:', p_value)

follow up mean chem_gluc: 258.4
no follow up mean chem_gluc: 250.7
chem_gluc p-value: 0.807035620738979


In [62]:
# chem_creat

yes_chem_creat = yes['chem_creat'].dropna().to_numpy()
no_chem_creat = no['chem_creat'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_chem_creat, no_chem_creat, equal_var=True) 
print('follow up mean chem_creat:', yes_chem_creat.mean().round(1))
print('no follow up mean chem_creat:', no_chem_creat.mean().round(1))
print('chem_creat p-value:', p_value)

follow up mean chem_creat: 1.1
no follow up mean chem_creat: 1.1
chem_creat p-value: 0.9485599549035529
