In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind,chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('../processed_data/merged_data.csv')
data.head()

Unnamed: 0,encrypt_mrn,ed_screened,visit_date,new_diagnosis,pcp_followup,max_sbp,min_sbp,max_dbp,min_dbp,max_diff,...,max_value_HEMOGLOBIN A1C,"max_value_LDL CHOLESTEROL, CALCULATED","min_value_CHOLESTEROL, TOTAL",min_value_CREATININE,min_value_GLUCOSE,"min_value_GLUCOSE, POC",min_value_HDL CHOLESTEROL,min_value_HEMOGLOBIN A1C,"min_value_LDL CHOLESTEROL, CALCULATED",visit_type
0,2303579,NYU Brooklyn,2023-01-10,Yes,Yes,133.0,114.0,96.0,64.0,52.0,...,9.3,176.0,268.0,0.74,305.0,230.0,31.0,9.3,176.0,Hospitalization
1,2117042,NYU Long Island,2022-01-13,Yes,Yes,102.0,102.0,69.0,69.0,33.0,...,,,,,,443.0,,,,Hospitalization
2,2117326,NYU Brooklyn,2022-01-14,Yes,No,158.0,148.0,88.0,82.0,76.0,...,11.2,,,0.7,423.0,404.0,,11.2,,Emergency
3,2112782,NYU Brooklyn,2022-01-16,Yes,No,181.0,146.0,93.0,87.0,88.0,...,11.6,,,1.03,418.0,491.0,,11.6,,Emergency
4,2062174,NYU Cobble Hill,2022-01-19,Yes,Yes,177.0,135.0,100.0,68.0,77.0,...,8.3,,,,,181.0,,8.3,,Emergency


In [3]:
# sort visit_date

data['visit_date'] = pd.to_datetime(data['visit_date'], format='%Y-%m-%d')
data = data.sort_values(by='visit_date')

data

Unnamed: 0,encrypt_mrn,ed_screened,visit_date,new_diagnosis,pcp_followup,max_sbp,min_sbp,max_dbp,min_dbp,max_diff,...,max_value_HEMOGLOBIN A1C,"max_value_LDL CHOLESTEROL, CALCULATED","min_value_CHOLESTEROL, TOTAL",min_value_CREATININE,min_value_GLUCOSE,"min_value_GLUCOSE, POC",min_value_HDL CHOLESTEROL,min_value_HEMOGLOBIN A1C,"min_value_LDL CHOLESTEROL, CALCULATED",visit_type
17,2113364,NYU Long Island,2022-01-06,Yes,Yes,164.0,145.0,95.0,82.0,75.0,...,13.0,,,0.86,200.0,214.0,,13.0,,Hospitalization
19,2115116,NYU Cobble Hill,2022-01-09,Yes,No,135.0,135.0,75.0,75.0,60.0,...,14.6,,,1.43,475.0,444.0,,14.6,,Hospitalization
1,2117042,NYU Long Island,2022-01-13,Yes,Yes,102.0,102.0,69.0,69.0,33.0,...,,,,,,443.0,,,,Hospitalization
2,2117326,NYU Brooklyn,2022-01-14,Yes,No,158.0,148.0,88.0,82.0,76.0,...,11.2,,,0.70,423.0,404.0,,11.2,,Emergency
3,2112782,NYU Brooklyn,2022-01-16,Yes,No,181.0,146.0,93.0,87.0,88.0,...,11.6,,,1.03,418.0,491.0,,11.6,,Emergency
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2018657,NYU Brooklyn,2023-12-02,Yes,Yes,156.0,148.0,92.0,89.0,64.0,...,7.1,,,1.33,199.0,183.0,,7.1,,Emergency
81,2479927,NYU Cobble Hill,2023-12-05,Yes,Yes,182.0,138.0,106.0,92.0,76.0,...,8.2,,,0.80,260.0,258.0,,8.2,,Emergency
85,2481662,NYU Brooklyn,2023-12-09,Yes,Yes,145.0,137.0,76.0,75.0,70.0,...,,,,,,,,,,Emergency
67,2405033,NYU Cobble Hill,2023-12-18,Yes,Yes,144.0,129.0,90.0,86.0,54.0,...,,,,0.91,109.0,,,,,Emergency


In [4]:
data['pcp_followup'].value_counts()

pcp_followup
Yes    204
No      50
Name: count, dtype: int64

In [5]:
# follow up / no follow up
yes = data[data['pcp_followup'] == 'Yes']
no = data[data['pcp_followup'] == 'No']


In [6]:
# age

yes_age = yes['age'].to_numpy()
no_age = no['age'].to_numpy()
t_stat, p_value = ttest_ind(yes_age, no_age, equal_var=True) 
print('follow up mean age:', yes_age.mean())
print('no follow up mean age:', no_age.mean())
print('Age p-value:', p_value)


follow up mean age: 52.96078431372549
no follow up mean age: 51.3
Age p-value: 0.2808737545923621


In [7]:
# sex
# sex (1: female, 2: male, 99: unknown, 951: other)

# male
yes_male = yes[yes['sex'] == 'Male'].shape[0]
no_male = no[no['sex'] == 'Male'].shape[0]
print('follow up male proportion:', round(yes_male / yes.shape[0], 3))
print('no follow up male proportion:', round(no_male / no.shape[0], 3))

yes_other = yes.shape[0] - yes_male
no_other = no.shape[0] - no_male

sex = np.array([[yes_male, yes_other], 
                 [no_male, no_other]])  

chi2, p, dof, expected = chi2_contingency(sex)
print('Chi-squared test p-value:', p)

follow up male proportion: 0.593
no follow up male proportion: 0.66
Chi-squared test p-value: 0.4803451287200199


In [8]:
# race

yes_detailed_race = yes['detailed_race'].value_counts()
no_detailed_race = no['detailed_race'].value_counts()
all_categories = set(yes_detailed_race.index).union(set(no_detailed_race.index))
yes_aligned = yes_detailed_race.reindex(all_categories, fill_value=0)
no_aligned = no_detailed_race.reindex(all_categories, fill_value=0)
detailed_race = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(detailed_race)
print('Chi-squared test p-value:', p)

detailed_race
Black       0.240196
Other       0.063725
Asian       0.058824
Hispanic    0.318627
White       0.318627
Name: count, dtype: float64
detailed_race
Black       0.26
Other       0.12
Asian       0.06
Hispanic    0.42
White       0.14
Name: count, dtype: float64
Chi-squared test p-value: 0.11150481115755405


In [9]:
# language

yes_language = yes['language'].value_counts()
no_language = no['language'].value_counts()
all_categories = set(yes_language.index).union(set(no_language.index))
yes_aligned = yes_language.reindex(all_categories, fill_value=0)
no_aligned = no_language.reindex(all_categories, fill_value=0)
language = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(language)
print('Chi-squared test p-value:', p)

language
Spanish    0.171569
English    0.808824
Other      0.019608
Name: count, dtype: float64
language
Spanish    0.34
English    0.60
Other      0.06
Name: count, dtype: float64
Chi-squared test p-value: 0.006026807028311744


In [10]:
# insurance_type

yes_insurance_type = yes['insurance_type'].value_counts()
no_insurance_type = no['insurance_type'].value_counts()
all_categories = set(yes_insurance_type.index).union(set(no_insurance_type.index))
yes_aligned = yes_insurance_type.reindex(all_categories, fill_value=0)
no_aligned = no_insurance_type.reindex(all_categories, fill_value=0)
insurance_type = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(insurance_type)
print('Chi-squared test p-value:', p)

insurance_type
SELFPAY     0.049020
MEDICAID    0.264706
PRIVATE     0.529412
OTHER       0.004902
MEDICARE    0.151961
Name: count, dtype: float64
insurance_type
SELFPAY     0.18
MEDICAID    0.52
PRIVATE     0.22
OTHER       0.00
MEDICARE    0.08
Name: count, dtype: float64
Chi-squared test p-value: 1.5896543265022498e-05


In [11]:
# bmi 

yes_bmi = yes['bmi'].dropna().to_numpy()
no_bmi = no['bmi'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_bmi, no_bmi, equal_var=True) 
print('follow up mean bmi:', yes_bmi.mean())
print('no follow up mean bmi:', no_bmi.mean())
print('bmi p-value:', p_value)

follow up mean bmi: 34.73151044791667
no follow up mean bmi: 33.436521847826086
bmi p-value: 0.28781223974297426


In [12]:
# height 

yes_height = yes['height'].dropna().to_numpy()
no_height = no['height'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_height, no_height, equal_var=True) 
print('follow up mean height:', yes_height.mean())
print('no follow up mean height:', no_height.mean())
print('height p-value:', p_value)

follow up mean height: 5.569385474860335
no follow up mean height: 5.534523809523809
height p-value: 0.6129339194172966


In [13]:
# weight 

yes_weight = yes['weight'].dropna().to_numpy()
no_weight = no['weight'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_weight, no_weight, equal_var=True) 
print('follow up mean weight:', yes_weight.mean())
print('no follow up mean weight:', no_weight.mean())
print('weight p-value:', p_value)

follow up mean weight: 224.2537244897959
no follow up mean weight: 210.07913043478263
weight p-value: 0.13377476123175527


In [14]:
# dm_on_pl

yes_dm_on_pl = yes['dm_on_pl'].value_counts()
no_dm_on_pl = no['dm_on_pl'].value_counts()
all_categories = set(yes_dm_on_pl.index).union(set(no_dm_on_pl.index))
yes_aligned = yes_dm_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_dm_on_pl.reindex(all_categories, fill_value=0)
dm_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_on_pl)
print('Chi-squared test p-value:', p)

dm_on_pl
Yes    0.04902
No     0.95098
Name: count, dtype: float64
dm_on_pl
Yes    0.1
No     0.9
Name: count, dtype: float64
Chi-squared test p-value: 0.30030788194248437


In [15]:
# dm_on_pmh

yes_dm_on_pmh = yes['dm_on_pmh'].value_counts()
no_dm_on_pmh = no['dm_on_pmh'].value_counts()
all_categories = set(yes_dm_on_pmh.index).union(set(no_dm_on_pmh.index))
yes_aligned = yes_dm_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_dm_on_pmh.reindex(all_categories, fill_value=0)
dm_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_on_pmh)
print('Chi-squared test p-value:', p)

dm_on_pmh
Yes    0.137255
No     0.862745
Name: count, dtype: float64
dm_on_pmh
Yes    0.2
No     0.8
Name: count, dtype: float64
Chi-squared test p-value: 0.37156644803743766


In [16]:
# htn_on_pl

yes_htn_on_pl = yes['htn_on_pl'].value_counts()
no_htn_on_pl = no['htn_on_pl'].value_counts()
all_categories = set(yes_htn_on_pl.index).union(set(no_htn_on_pl.index))
yes_aligned = yes_htn_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_htn_on_pl.reindex(all_categories, fill_value=0)
htn_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_on_pl)
print('Chi-squared test p-value:', p)

htn_on_pl
Yes    0.151961
No     0.848039
Name: count, dtype: float64
htn_on_pl
Yes    0.14
No     0.86
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [17]:
# htn_on_pmh

yes_htn_on_pmh = yes['htn_on_pmh'].value_counts()
no_htn_on_pmh = no['htn_on_pmh'].value_counts()
all_categories = set(yes_htn_on_pmh.index).union(set(no_htn_on_pmh.index))
yes_aligned = yes_htn_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_htn_on_pmh.reindex(all_categories, fill_value=0)
htn_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_on_pmh)
print('Chi-squared test p-value:', p)

htn_on_pmh
Yes    0.401961
No     0.598039
Name: count, dtype: float64
htn_on_pmh
Yes    0.26
No     0.74
Name: count, dtype: float64
Chi-squared test p-value: 0.08986119717756094


In [18]:
# hld_on_pl

yes_hld_on_pl = yes['hld_on_pl'].value_counts()
no_hld_on_pl = no['hld_on_pl'].value_counts()
all_categories = set(yes_hld_on_pl.index).union(set(no_hld_on_pl.index))
yes_aligned = yes_hld_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_hld_on_pl.reindex(all_categories, fill_value=0)
hld_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(hld_on_pl)
print('Chi-squared test p-value:', p)

hld_on_pl
Yes    0.088235
No     0.911765
Name: count, dtype: float64
hld_on_pl
Yes    0.06
No     0.94
Name: count, dtype: float64
Chi-squared test p-value: 0.7164497400700032


In [19]:
# hld_on_pmh

yes_hld_on_pmh = yes['hld_on_pmh'].value_counts()
no_hld_on_pmh = no['hld_on_pmh'].value_counts()
all_categories = set(yes_hld_on_pmh.index).union(set(no_hld_on_pmh.index))
yes_aligned = yes_hld_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_hld_on_pmh.reindex(all_categories, fill_value=0)
hld_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(hld_on_pmh)
print('Chi-squared test p-value:', p)

hld_on_pmh
Yes    0.20098
No     0.79902
Name: count, dtype: float64
hld_on_pmh
Yes    0.18
No     0.82
Name: count, dtype: float64
Chi-squared test p-value: 0.8918715162468956


In [20]:
# gdm_on_pl

yes_gdm_on_pl = yes['gdm_on_pl'].value_counts()
no_gdm_on_pl = no['gdm_on_pl'].value_counts()
all_categories = set(yes_gdm_on_pl.index).union(set(no_gdm_on_pl.index))
yes_aligned = yes_gdm_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_gdm_on_pl.reindex(all_categories, fill_value=0)
gdm_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(gdm_on_pl)
print('Chi-squared test p-value:', p)

gdm_on_pl
No    1.0
Name: count, dtype: float64
gdm_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [21]:
# gdm_on_pmh

yes_gdm_on_pmh = yes['gdm_on_pmh'].value_counts()
no_gdm_on_pmh = no['gdm_on_pmh'].value_counts()
all_categories = set(yes_gdm_on_pmh.index).union(set(no_gdm_on_pmh.index))
yes_aligned = yes_gdm_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_gdm_on_pmh.reindex(all_categories, fill_value=0)
gdm_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(gdm_on_pmh)
print('Chi-squared test p-value:', p)

gdm_on_pmh
No    1.0
Name: count, dtype: float64
gdm_on_pmh
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [22]:
# pcos_on_pl

yes_pcos_on_pl = yes['pcos_on_pl'].value_counts()
no_pcos_on_pl = no['pcos_on_pl'].value_counts()
all_categories = set(yes_pcos_on_pl.index).union(set(no_pcos_on_pl.index))
yes_aligned = yes_pcos_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_pcos_on_pl.reindex(all_categories, fill_value=0)
pcos_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(pcos_on_pl)
print('Chi-squared test p-value:', p)

pcos_on_pl
No    1.0
Name: count, dtype: float64
pcos_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [23]:
# pcos_on_pmh

yes_pcos_on_pmh = yes['pcos_on_pmh'].value_counts()
no_pcos_on_pmh = no['pcos_on_pmh'].value_counts()
all_categories = set(yes_pcos_on_pmh.index).union(set(no_pcos_on_pmh.index))
yes_aligned = yes_pcos_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_pcos_on_pmh.reindex(all_categories, fill_value=0)
pcos_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(pcos_on_pmh)
print('Chi-squared test p-value:', p)

pcos_on_pmh
Yes    0.014706
No     0.985294
Name: count, dtype: float64
pcos_on_pmh
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 0.8947741639643179


In [24]:
# cad_on_pl

yes_cad_on_pl = yes['cad_on_pl'].value_counts()
no_cad_on_pl = no['cad_on_pl'].value_counts()
all_categories = set(yes_cad_on_pl.index).union(set(no_cad_on_pl.index))
yes_aligned = yes_cad_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_cad_on_pl.reindex(all_categories, fill_value=0)
cad_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cad_on_pl)
print('Chi-squared test p-value:', p)

cad_on_pl
Yes    0.02451
No     0.97549
Name: count, dtype: float64
cad_on_pl
Yes    0.02
No     0.98
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [25]:
# cad_on_pmh

yes_cad_on_pmh = yes['cad_on_pmh'].value_counts()
no_cad_on_pmh = no['cad_on_pmh'].value_counts()
all_categories = set(yes_cad_on_pmh.index).union(set(no_cad_on_pmh.index))
yes_aligned = yes_cad_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_cad_on_pmh.reindex(all_categories, fill_value=0)
cad_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cad_on_pmh)
print('Chi-squared test p-value:', p)

cad_on_pmh
Yes    0.02451
No     0.97549
Name: count, dtype: float64
cad_on_pmh
Yes    0.02
No     0.98
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [26]:
# chf_on_pl

yes_chf_on_pl = yes['chf_on_pl'].value_counts()
no_chf_on_pl = no['chf_on_pl'].value_counts()
all_categories = set(yes_chf_on_pl.index).union(set(no_chf_on_pl.index))
yes_aligned = yes_chf_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_chf_on_pl.reindex(all_categories, fill_value=0)
chf_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(chf_on_pl)
print('Chi-squared test p-value:', p)

chf_on_pl
Yes    0.004902
No     0.995098
Name: count, dtype: float64
chf_on_pl
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [27]:
# chf_on_pmh

yes_chf_on_pmh = yes['chf_on_pmh'].value_counts()
no_chf_on_pmh = no['chf_on_pmh'].value_counts()
all_categories = set(yes_chf_on_pmh.index).union(set(no_chf_on_pmh.index))
yes_aligned = yes_chf_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_chf_on_pmh.reindex(all_categories, fill_value=0)
chf_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(chf_on_pmh)
print('Chi-squared test p-value:', p)

chf_on_pmh
Yes    0.019608
No     0.980392
Name: count, dtype: float64
chf_on_pmh
Yes    0.02
No     0.98
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [28]:
# depression_on_pl

yes_depression_on_pl = yes['depression_on_pl'].value_counts()
no_depression_on_pl = no['depression_on_pl'].value_counts()
all_categories = set(yes_depression_on_pl.index).union(set(no_depression_on_pl.index))
yes_aligned = yes_depression_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_depression_on_pl.reindex(all_categories, fill_value=0)
depression_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(depression_on_pl)
print('Chi-squared test p-value:', p)

depression_on_pl
Yes    0.029412
No     0.970588
Name: count, dtype: float64
depression_on_pl
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 0.4791192144625801


In [29]:
# depression_on_pmh

yes_depression_on_pmh = yes['depression_on_pmh'].value_counts()
no_depression_on_pmh = no['depression_on_pmh'].value_counts()
all_categories = set(yes_depression_on_pmh.index).union(set(no_depression_on_pmh.index))
yes_aligned = yes_depression_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_depression_on_pmh.reindex(all_categories, fill_value=0)
depression_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(depression_on_pmh)
print('Chi-squared test p-value:', p)

depression_on_pmh
Yes    0.04902
No     0.95098
Name: count, dtype: float64
depression_on_pmh
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 0.23341791348654384


In [30]:
# cva_on_pl

yes_cva_on_pl = yes['cva_on_pl'].value_counts()
no_cva_on_pl = no['cva_on_pl'].value_counts()
all_categories = set(yes_cva_on_pl.index).union(set(no_cva_on_pl.index))
yes_aligned = yes_cva_on_pl.reindex(all_categories, fill_value=0)
no_aligned = no_cva_on_pl.reindex(all_categories, fill_value=0)
cva_on_pl = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cva_on_pl)
print('Chi-squared test p-value:', p)

cva_on_pl
No    1.0
Name: count, dtype: float64
cva_on_pl
No    1.0
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [31]:
# cva_on_pmh

yes_cva_on_pmh = yes['cva_on_pmh'].value_counts()
no_cva_on_pmh = no['cva_on_pmh'].value_counts()
all_categories = set(yes_cva_on_pmh.index).union(set(no_cva_on_pmh.index))
yes_aligned = yes_cva_on_pmh.reindex(all_categories, fill_value=0)
no_aligned = no_cva_on_pmh.reindex(all_categories, fill_value=0)
cva_on_pmh = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(cva_on_pmh)
print('Chi-squared test p-value:', p)

cva_on_pmh
Yes    0.02451
No     0.97549
Name: count, dtype: float64
cva_on_pmh
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 0.5822546410717091


In [32]:
# family_dm

yes_family_dm = yes['family_dm'].dropna().to_numpy()
no_family_dm = no['family_dm'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_family_dm, no_family_dm, equal_var=True) 
print('follow up mean family_dm:', yes_family_dm.mean())
print('no follow up mean family_dm:', no_family_dm.mean())
print('family_dm p-value:', p_value)

follow up mean family_dm: 0.5063291139240507
no follow up mean family_dm: 0.6666666666666666
family_dm p-value: 0.48651850593482493


In [33]:
# tobacco_user

yes_tobacco_user = yes['tobacco_user'].value_counts()
no_tobacco_user = no['tobacco_user'].value_counts()
all_categories = set(yes_tobacco_user.index).union(set(no_tobacco_user.index))
yes_aligned = yes_tobacco_user.reindex(all_categories, fill_value=0)
no_aligned = no_tobacco_user.reindex(all_categories, fill_value=0)
tobacco_user = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(tobacco_user)
print('Chi-squared test p-value:', p)

tobacco_user
Not Asked    0.042857
Never        0.714286
Yes          0.064286
Quit         0.178571
Name: count, dtype: float64
tobacco_user
Not Asked    0.000000
Never        0.666667
Yes          0.185185
Quit         0.148148
Name: count, dtype: float64
Chi-squared test p-value: 0.15130170447230368


In [34]:
# dm_meds

yes_dm_meds = yes['dm_meds'].value_counts()
no_dm_meds = no['dm_meds'].value_counts()
all_categories = set(yes_dm_meds.index).union(set(no_dm_meds.index))
yes_aligned = yes_dm_meds.reindex(all_categories, fill_value=0)
no_aligned = no_dm_meds.reindex(all_categories, fill_value=0)
dm_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(dm_meds)
print('Chi-squared test p-value:', p)

dm_meds
Yes    0.481675
No     0.518325
Name: count, dtype: float64
dm_meds
Yes    0.292683
No     0.707317
Name: count, dtype: float64
Chi-squared test p-value: 0.041867998139034204


In [35]:
# htn_meds

yes_htn_meds = yes['htn_meds'].value_counts()
no_htn_meds = no['htn_meds'].value_counts()
all_categories = set(yes_htn_meds.index).union(set(no_htn_meds.index))
yes_aligned = yes_htn_meds.reindex(all_categories, fill_value=0)
no_aligned = no_htn_meds.reindex(all_categories, fill_value=0)
htn_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(htn_meds)
print('Chi-squared test p-value:', p)

htn_meds
Yes    0.225131
No     0.774869
Name: count, dtype: float64
htn_meds
Yes    0.097561
No     0.902439
Name: count, dtype: float64
Chi-squared test p-value: 0.10312237098734389


In [36]:
# statin_meds

yes_statin_meds = yes['statin_meds'].value_counts()
no_statin_meds = no['statin_meds'].value_counts()
all_categories = set(yes_statin_meds.index).union(set(no_statin_meds.index))
yes_aligned = yes_statin_meds.reindex(all_categories, fill_value=0)
no_aligned = no_statin_meds.reindex(all_categories, fill_value=0)
statin_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(statin_meds)
print('Chi-squared test p-value:', p)

statin_meds
Yes    0.308901
No     0.691099
Name: count, dtype: float64
statin_meds
Yes    0.170732
No     0.829268
Name: count, dtype: float64
Chi-squared test p-value: 0.11217391945443957


In [37]:
# aspirin_meds

yes_aspirin_meds = yes['aspirin_meds'].value_counts()
no_aspirin_meds = no['aspirin_meds'].value_counts()
all_categories = set(yes_aspirin_meds.index).union(set(no_aspirin_meds.index))
yes_aligned = yes_aspirin_meds.reindex(all_categories, fill_value=0)
no_aligned = no_aspirin_meds.reindex(all_categories, fill_value=0)
aspirin_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(aspirin_meds)
print('Chi-squared test p-value:', p)

aspirin_meds
Yes    0.157068
No     0.842932
Name: count, dtype: float64
aspirin_meds
Yes    0.097561
No     0.902439
Name: count, dtype: float64
Chi-squared test p-value: 0.46280934939987206


In [38]:
# plavix_meds

yes_plavix_meds = yes['plavix_meds'].value_counts()
no_plavix_meds = no['plavix_meds'].value_counts()
all_categories = set(yes_plavix_meds.index).union(set(no_plavix_meds.index))
yes_aligned = yes_plavix_meds.reindex(all_categories, fill_value=0)
no_aligned = no_plavix_meds.reindex(all_categories, fill_value=0)
plavix_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(plavix_meds)
print('Chi-squared test p-value:', p)

plavix_meds
Yes    0.036649
No     0.963351
Name: count, dtype: float64
plavix_meds
Yes    0.02439
No     0.97561
Name: count, dtype: float64
Chi-squared test p-value: 1.0


In [39]:
# lasix_meds

yes_lasix_meds = yes['lasix_meds'].value_counts()
no_lasix_meds = no['lasix_meds'].value_counts()
all_categories = set(yes_lasix_meds.index).union(set(no_lasix_meds.index))
yes_aligned = yes_lasix_meds.reindex(all_categories, fill_value=0)
no_aligned = no_lasix_meds.reindex(all_categories, fill_value=0)
lasix_meds = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(lasix_meds)
print('Chi-squared test p-value:', p)

lasix_meds
Yes    0.057592
No     0.942408
Name: count, dtype: float64
lasix_meds
Yes    0.0
No     1.0
Name: count, dtype: float64
Chi-squared test p-value: 0.24221581489585198


In [40]:
# disposition

yes_disposition = yes['disposition'].value_counts()
no_disposition = no['disposition'].value_counts()
all_categories = set(yes_disposition.index).union(set(no_disposition.index))
yes_aligned = yes_disposition.reindex(all_categories, fill_value=0)
no_aligned = no_disposition.reindex(all_categories, fill_value=0)
disposition = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(disposition)
print('Chi-squared test p-value:', p)

disposition
Discharge                     0.514706
LWBS after Triage             0.004902
Admit                         0.450980
AMA                           0.024510
Transfer to Procedure Area    0.004902
Name: count, dtype: float64
disposition
Discharge                     0.72
LWBS after Triage             0.00
Admit                         0.26
AMA                           0.00
Transfer to Procedure Area    0.02
Name: count, dtype: float64
Chi-squared test p-value: 0.055712501704334734


In [41]:
# ed_name

yes_ed_name = yes['ed_name'].value_counts()
no_ed_name = no['ed_name'].value_counts()
all_categories = set(yes_ed_name.index).union(set(no_ed_name.index))
yes_aligned = yes_ed_name.reindex(all_categories, fill_value=0)
no_aligned = no_ed_name.reindex(all_categories, fill_value=0)
ed_name = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(ed_name)
print('Chi-squared test p-value:', p)

ed_name
NYU COBBLE HILL    0.147059
NYU BROOKLYN       0.279412
NYU MANHATTAN      0.200980
NYU LONG ISLAND    0.372549
Name: count, dtype: float64
ed_name
NYU COBBLE HILL    0.04
NYU BROOKLYN       0.48
NYU MANHATTAN      0.18
NYU LONG ISLAND    0.30
Name: count, dtype: float64
Chi-squared test p-value: 0.02442103140024817


In [42]:
# sbp_1st

yes_sbp_1st = yes['sbp_1st'].dropna().to_numpy()
no_sbp_1st = no['sbp_1st'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_sbp_1st, no_sbp_1st, equal_var=True) 
print('follow up mean sbp_1st:', yes_sbp_1st.mean())
print('no follow up mean sbp_1st:', no_sbp_1st.mean())
print('sbp_1st p-value:', p_value)

follow up mean sbp_1st: 148.67156862745097
no follow up mean sbp_1st: 143.5
sbp_1st p-value: 0.1584314128077748


In [43]:
# dbp_1st

yes_dbp_1st = yes['dbp_1st'].dropna().to_numpy()
no_dbp_1st = no['dbp_1st'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_dbp_1st, no_dbp_1st, equal_var=True) 
print('follow up mean dbp_1st:', yes_dbp_1st.mean())
print('no follow up mean dbp_1st:', no_dbp_1st.mean())
print('dbp_1st p-value:', p_value)

follow up mean dbp_1st: 88.51470588235294
no follow up mean dbp_1st: 84.24
dbp_1st p-value: 0.0804199089878003


In [44]:
# bpa_response

yes_bpa_response = yes['bpa_response'].value_counts()
no_bpa_response = no['bpa_response'].value_counts()
all_categories = set(yes_bpa_response.index).union(set(no_bpa_response.index))
yes_aligned = yes_bpa_response.reindex(all_categories, fill_value=0)
no_aligned = no_bpa_response.reindex(all_categories, fill_value=0)
bpa_response = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(bpa_response)
print('Chi-squared test p-value:', p)

bpa_response
Single Order                    0.893443
Name: count, dtype: float64
bpa_response
Single Order                    0.814815
Name: count, dtype: float64
Chi-squared test p-value: 0.41904297839249594


In [45]:
# primary_care

yes_primary_care = yes['primary_care'].value_counts()
no_primary_care = no['primary_care'].value_counts()
all_categories = set(yes_primary_care.index).union(set(no_primary_care.index))
yes_aligned = yes_primary_care.reindex(all_categories, fill_value=0)
no_aligned = no_primary_care.reindex(all_categories, fill_value=0)
primary_care = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(primary_care)
print('Chi-squared test p-value:', p)

primary_care
YES    0.536145
NO     0.463855
Name: count, dtype: float64
primary_care
YES    0.578947
NO     0.421053
Name: count, dtype: float64
Chi-squared test p-value: 0.7661953314069995


In [46]:
# hba1c

yes_hba1c = yes['hba1c'].dropna().to_numpy()
no_hba1c = no['hba1c'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_hba1c, no_hba1c, equal_var=True) 
print('follow up mean hba1c:', yes_hba1c.mean())
print('no follow up mean hba1c:', no_hba1c.mean())
print('hba1c p-value:', p_value)

follow up mean hba1c: 9.099375008749998
no follow up mean hba1c: 8.897142865714287
hba1c p-value: 0.6745409245576235


In [47]:
# poct_gluc

yes_poct_gluc = yes['poct_gluc'].dropna().to_numpy()
no_poct_gluc = no['poct_gluc'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_poct_gluc, no_poct_gluc, equal_var=True) 
print('follow up mean poct_gluc:', yes_poct_gluc.mean())
print('no follow up mean poct_gluc:', no_poct_gluc.mean())
print('poct_gluc p-value:', p_value)

follow up mean poct_gluc: 320.5061728395062
no follow up mean poct_gluc: 258.85
poct_gluc p-value: 0.08860960289610122


In [48]:
# chem_gluc

yes_chem_gluc = yes['chem_gluc'].dropna().to_numpy()
no_chem_gluc = no['chem_gluc'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_chem_gluc, no_chem_gluc, equal_var=True) 
print('follow up mean chem_gluc:', yes_chem_gluc.mean())
print('no follow up mean chem_gluc:', no_chem_gluc.mean())
print('chem_gluc p-value:', p_value)

follow up mean chem_gluc: 258.4175824175824
no follow up mean chem_gluc: 250.71794871794873
chem_gluc p-value: 0.807035620738979


In [49]:
# chem_creat

yes_chem_creat = yes['chem_creat'].dropna().to_numpy()
no_chem_creat = no['chem_creat'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_chem_creat, no_chem_creat, equal_var=True) 
print('follow up mean chem_creat:', yes_chem_creat.mean())
print('no follow up mean chem_creat:', no_chem_creat.mean())
print('chem_creat p-value:', p_value)

follow up mean chem_creat: 1.10142857510989
no follow up mean chem_creat: 1.107948708717949
chem_creat p-value: 0.9485599549035529


In [50]:
# visit_type

yes_visit_type = yes['visit_type'].value_counts()
no_visit_type = no['visit_type'].value_counts()
all_categories = set(yes_visit_type.index).union(set(no_visit_type.index))
yes_aligned = yes_visit_type.reindex(all_categories, fill_value=0)
no_aligned = no_visit_type.reindex(all_categories, fill_value=0)
visit_type = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(visit_type)
print('Chi-squared test p-value:', p)

visit_type
Emergency               0.551724
Specialty Outpatient    0.034483
Hospitalization         0.413793
Name: count, dtype: float64
visit_type
Emergency               0.60
Specialty Outpatient    0.02
Hospitalization         0.38
Name: count, dtype: float64
Chi-squared test p-value: 0.761381781370938


In [51]:
# max_sbp

yes_max_sbp = yes['max_sbp'].dropna().to_numpy()
no_max_sbp = no['max_sbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_sbp, no_max_sbp, equal_var=True) 
print('follow up mean max_sbp:', yes_max_sbp.mean())
print('no follow up mean max_sbp:', no_max_sbp.mean())
print('max_sbp p-value:', p_value)

follow up mean max_sbp: 155.7029702970297
no follow up mean max_sbp: 148.1
max_sbp p-value: 0.034169874637864465


In [52]:
# min_sbp

yes_min_sbp = yes['min_sbp'].dropna().to_numpy()
no_min_sbp = no['min_sbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_min_sbp, no_min_sbp, equal_var=True) 
print('follow up mean min_sbp:', yes_min_sbp.mean())
print('no follow up mean min_sbp:', no_min_sbp.mean())
print('min_sbp p-value:', p_value)

follow up mean min_sbp: 132.12871287128712
no follow up mean min_sbp: 130.66
min_sbp p-value: 0.6535218977649286


In [53]:
# avg_sbp

yes_avg_sbp = yes['avg_sbp'].dropna().to_numpy()
no_avg_sbp = no['avg_sbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_sbp, no_avg_sbp, equal_var=True) 
print('follow up mean avg_sbp:', yes_avg_sbp.mean())
print('no follow up mean avg_sbp:', no_avg_sbp.mean())
print('avg_sbp p-value:', p_value)

follow up mean avg_sbp: 143.98430693069307
no follow up mean avg_sbp: 138.4612
avg_sbp p-value: 0.07501347126207704


In [54]:
# max_dbp

yes_max_dbp = yes['max_dbp'].dropna().to_numpy()
no_max_dbp = no['max_dbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_dbp, no_max_dbp, equal_var=True) 
print('follow up mean max_dbp:', yes_max_dbp.mean())
print('no follow up mean max_dbp:', no_max_dbp.mean())
print('max_dbp p-value:', p_value)

follow up mean max_dbp: 93.74752475247524
no follow up mean max_dbp: 89.82
max_dbp p-value: 0.10284313447951746


In [55]:
# min_dbp

yes_min_dbp = yes['min_dbp'].dropna().to_numpy()
no_min_dbp = no['min_dbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_min_dbp, no_min_dbp, equal_var=True) 
print('follow up mean min_dbp:', yes_min_dbp.mean())
print('no follow up mean min_dbp:', no_min_dbp.mean())
print('min_dbp p-value:', p_value)

follow up mean min_dbp: 77.16831683168317
no follow up mean min_dbp: 76.1
min_dbp p-value: 0.6196873180429204


In [56]:
# avg_dbp

yes_avg_dbp = yes['avg_dbp'].dropna().to_numpy()
no_avg_dbp = no['avg_dbp'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_dbp, no_avg_dbp, equal_var=True) 
print('follow up mean avg_dbp:', yes_avg_dbp.mean())
print('no follow up mean avg_dbp:', no_avg_dbp.mean())
print('avg_dbp p-value:', p_value)

follow up mean avg_dbp: 85.23678217821784
no follow up mean avg_dbp: 82.8936
avg_dbp p-value: 0.23196670936037644


In [57]:
# max_diff

yes_max_diff = yes['max_diff'].dropna().to_numpy()
no_max_diff = no['max_diff'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_diff, no_max_diff, equal_var=True) 
print('follow up mean max_diff:', yes_max_diff.mean())
print('no follow up mean max_diff:', no_max_diff.mean())
print('max_diff p-value:', p_value)

follow up mean max_diff: 68.47524752475248
no follow up mean max_diff: 64.74
max_diff p-value: 0.198455715480667


In [58]:
# avg_diff

yes_avg_diff = yes['avg_diff'].dropna().to_numpy()
no_avg_diff = no['avg_diff'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_diff, no_avg_diff, equal_var=True) 
print('follow up mean avg_diff:', yes_avg_diff.mean())
print('no follow up mean avg_diff:', no_avg_diff.mean())
print('avg_diff p-value:', p_value)

follow up mean avg_diff: 58.74777227722772
no follow up mean avg_diff: 55.567
avg_diff p-value: 0.18371706774609348


In [59]:
# avg_value_CHOLESTEROL_TOTAL

yes_avg_value_CHOLESTEROL_TOTAL = yes['avg_value_CHOLESTEROL, TOTAL'].dropna().to_numpy()
no_avg_value_CHOLESTEROL_TOTAL = no['avg_value_CHOLESTEROL, TOTAL'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_CHOLESTEROL_TOTAL, no_avg_value_CHOLESTEROL_TOTAL, equal_var=True) 
print('follow up mean avg_value_CHOLESTEROL_TOTAL:', yes_avg_value_CHOLESTEROL_TOTAL.mean())
print('no follow up mean avg_value_CHOLESTEROL_TOTAL:', no_avg_value_CHOLESTEROL_TOTAL.mean())
print('avg_value_CHOLESTEROL_TOTAL p-value:', p_value)

follow up mean avg_value_CHOLESTEROL_TOTAL: 215.92307692307693
no follow up mean avg_value_CHOLESTEROL_TOTAL: 132.5
avg_value_CHOLESTEROL_TOTAL p-value: 0.0820628403220119


In [60]:
# max_value_CHOLESTEROL_TOTAL

yes_max_value_CHOLESTEROL_TOTAL = yes['max_value_CHOLESTEROL, TOTAL'].dropna().to_numpy()
no_max_value_CHOLESTEROL_TOTAL = no['max_value_CHOLESTEROL, TOTAL'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_CHOLESTEROL_TOTAL, no_max_value_CHOLESTEROL_TOTAL, equal_var=True) 
print('follow up mean max_value_CHOLESTEROL_TOTAL:', yes_max_value_CHOLESTEROL_TOTAL.mean())
print('no follow up mean max_value_CHOLESTEROL_TOTAL:', no_max_value_CHOLESTEROL_TOTAL.mean())
print('max_value_CHOLESTEROL_TOTAL p-value:', p_value)

follow up mean max_value_CHOLESTEROL_TOTAL: 215.92307692307693
no follow up mean max_value_CHOLESTEROL_TOTAL: 132.5
max_value_CHOLESTEROL_TOTAL p-value: 0.0820628403220119


In [61]:
# avg_value_CREATININE

yes_avg_value_CREATININE = yes['avg_value_CREATININE'].dropna().to_numpy()
no_avg_value_CREATININE = no['avg_value_CREATININE'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_CREATININE, no_avg_value_CREATININE, equal_var=True) 
print('follow up mean avg_value_CREATININE:', yes_avg_value_CREATININE.mean())
print('no follow up mean avg_value_CREATININE:', no_avg_value_CREATININE.mean())
print('avg_value_CREATININE p-value:', p_value)

follow up mean avg_value_CREATININE: 1.0690329909722223
no follow up mean avg_value_CREATININE: 1.1040476180612246
avg_value_CREATININE p-value: 0.6796224627916438


In [62]:
# max_value_CREATININE

yes_max_value_CREATININE = yes['max_value_CREATININE'].dropna().to_numpy()
no_max_value_CREATININE = no['max_value_CREATININE'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_CREATININE, no_max_value_CREATININE, equal_var=True) 
print('follow up mean max_value_CREATININE:', yes_max_value_CREATININE.mean())
print('no follow up mean max_value_CREATININE:', no_max_value_CREATININE.mean())
print('max_value_CREATININE p-value:', p_value)

follow up mean max_value_CREATININE: 1.09369792203125
no follow up mean max_value_CREATININE: 1.1228571379591836
max_value_CREATININE p-value: 0.7429907032285665


In [63]:
# avg_value_GLUCOSE

yes_avg_value_GLUCOSE = yes['avg_value_GLUCOSE'].dropna().to_numpy()
no_avg_value_GLUCOSE = no['avg_value_GLUCOSE'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_GLUCOSE, no_avg_value_GLUCOSE, equal_var=True) 
print('follow up mean avg_value_GLUCOSE:', yes_avg_value_GLUCOSE.mean())
print('no follow up mean avg_value_GLUCOSE:', no_avg_value_GLUCOSE.mean())
print('avg_value_GLUCOSE p-value:', p_value)

follow up mean avg_value_GLUCOSE: 252.5509548611111
no follow up mean avg_value_GLUCOSE: 253.5408163265306
avg_value_GLUCOSE p-value: 0.96706861476623


In [64]:
# max_value_GLUCOSE

yes_max_value_GLUCOSE = yes['max_value_GLUCOSE'].dropna().to_numpy()
no_max_value_GLUCOSE = no['max_value_GLUCOSE'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_GLUCOSE, no_max_value_GLUCOSE, equal_var=True) 
print('follow up mean max_value_GLUCOSE:', yes_max_value_GLUCOSE.mean())
print('no follow up mean max_value_GLUCOSE:', no_max_value_GLUCOSE.mean())
print('max_value_GLUCOSE p-value:', p_value)

follow up mean max_value_GLUCOSE: 268.3020833333333
no follow up mean max_value_GLUCOSE: 261.8775510204082
max_value_GLUCOSE p-value: 0.8185889828358099


In [65]:
# avg_value_GLUCOSE_POC

yes_avg_value_GLUCOSE_POC = yes['avg_value_GLUCOSE, POC'].dropna().to_numpy()
no_avg_value_GLUCOSE_POC = no['avg_value_GLUCOSE, POC'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_GLUCOSE_POC, no_avg_value_GLUCOSE_POC, equal_var=True) 
print('follow up mean avg_value_GLUCOSE_POC:', yes_avg_value_GLUCOSE_POC.mean())
print('no follow up mean avg_value_GLUCOSE_POC:', no_avg_value_GLUCOSE_POC.mean())
print('avg_value_GLUCOSE_POC p-value:', p_value)

follow up mean avg_value_GLUCOSE_POC: 274.91658099665983
no follow up mean avg_value_GLUCOSE_POC: 274.3641025641026
avg_value_GLUCOSE_POC p-value: 0.9822660957714858


In [66]:
# max_value_GLUCOSE_POC

yes_max_value_GLUCOSE_POC = yes['max_value_GLUCOSE, POC'].dropna().to_numpy()
no_max_value_GLUCOSE_POC = no['max_value_GLUCOSE, POC'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_GLUCOSE_POC, no_max_value_GLUCOSE_POC, equal_var=True) 
print('follow up mean max_value_GLUCOSE_POC:', yes_max_value_GLUCOSE_POC.mean())
print('no follow up mean max_value_GLUCOSE_POC:', no_max_value_GLUCOSE_POC.mean())
print('max_value_GLUCOSE_POC p-value:', p_value)

follow up mean max_value_GLUCOSE_POC: 324.07216494845363
no follow up mean max_value_GLUCOSE_POC: 292.7307692307692
max_value_GLUCOSE_POC p-value: 0.31802174949461837


In [67]:
# avg_value_HDL_CHOLESTEROL

yes_avg_value_HDL_CHOLESTEROL = yes['avg_value_HDL CHOLESTEROL'].dropna().to_numpy()
no_avg_value_HDL_CHOLESTEROL = no['avg_value_HDL CHOLESTEROL'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_HDL_CHOLESTEROL, no_avg_value_HDL_CHOLESTEROL, equal_var=True) 
print('follow up mean avg_value_HDL_CHOLESTEROL:', yes_avg_value_HDL_CHOLESTEROL.mean())
print('no follow up mean avg_value_HDL_CHOLESTEROL:', no_avg_value_HDL_CHOLESTEROL.mean())
print('avg_value_HDL_CHOLESTEROL p-value:', p_value)

follow up mean avg_value_HDL_CHOLESTEROL: 41.53846153846154
no follow up mean avg_value_HDL_CHOLESTEROL: 35.5
avg_value_HDL_CHOLESTEROL p-value: 0.4116105928268441


In [68]:
# max_value_HDL_CHOLESTEROL

yes_max_value_HDL_CHOLESTEROL = yes['max_value_HDL CHOLESTEROL'].dropna().to_numpy()
no_max_value_HDL_CHOLESTEROL = no['max_value_HDL CHOLESTEROL'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_HDL_CHOLESTEROL, no_max_value_HDL_CHOLESTEROL, equal_var=True) 
print('follow up mean max_value_HDL_CHOLESTEROL:', yes_max_value_HDL_CHOLESTEROL.mean())
print('no follow up mean max_value_HDL_CHOLESTEROL:', no_max_value_HDL_CHOLESTEROL.mean())
print('max_value_HDL_CHOLESTEROL p-value:', p_value)

follow up mean max_value_HDL_CHOLESTEROL: 41.53846153846154
no follow up mean max_value_HDL_CHOLESTEROL: 35.5
max_value_HDL_CHOLESTEROL p-value: 0.4116105928268441


In [69]:
# avg_value_HEMOGLOBIN_A1C

yes_avg_value_HEMOGLOBIN_A1C = yes['avg_value_HEMOGLOBIN A1C'].dropna().to_numpy()
no_avg_value_HEMOGLOBIN_A1C = no['avg_value_HEMOGLOBIN A1C'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_HEMOGLOBIN_A1C, no_avg_value_HEMOGLOBIN_A1C, equal_var=True) 
print('follow up mean avg_value_HEMOGLOBIN_A1C:', yes_avg_value_HEMOGLOBIN_A1C.mean())
print('no follow up mean avg_value_HEMOGLOBIN_A1C:', no_avg_value_HEMOGLOBIN_A1C.mean())
print('avg_value_HEMOGLOBIN_A1C p-value:', p_value)

follow up mean avg_value_HEMOGLOBIN_A1C: 9.142076503825137
no follow up mean avg_value_HEMOGLOBIN_A1C: 8.927659559574467
avg_value_HEMOGLOBIN_A1C p-value: 0.6095460821180638


In [70]:
# max_value_HEMOGLOBIN_A1C

yes_max_value_HEMOGLOBIN_A1C = yes['max_value_HEMOGLOBIN A1C'].dropna().to_numpy()
no_max_value_HEMOGLOBIN_A1C = no['max_value_HEMOGLOBIN A1C'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_HEMOGLOBIN_A1C, no_max_value_HEMOGLOBIN_A1C, equal_var=True) 
print('follow up mean max_value_HEMOGLOBIN_A1C:', yes_max_value_HEMOGLOBIN_A1C.mean())
print('no follow up mean max_value_HEMOGLOBIN_A1C:', no_max_value_HEMOGLOBIN_A1C.mean())
print('max_value_HEMOGLOBIN_A1C p-value:', p_value)

follow up mean max_value_HEMOGLOBIN_A1C: 9.143169400546448
no follow up mean max_value_HEMOGLOBIN_A1C: 8.927659559574467
max_value_HEMOGLOBIN_A1C p-value: 0.6077010830415719


In [71]:
# avg_value_LDL_CHOLESTEROL_CALCULATED

yes_avg_value_LDL_CHOLESTEROL_CALCULATED = yes['avg_value_LDL CHOLESTEROL, CALCULATED'].dropna().to_numpy()
no_avg_value_LDL_CHOLESTEROL_CALCULATED = no['avg_value_LDL CHOLESTEROL, CALCULATED'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_avg_value_LDL_CHOLESTEROL_CALCULATED, no_avg_value_LDL_CHOLESTEROL_CALCULATED, equal_var=True) 
print('follow up mean avg_value_LDL_CHOLESTEROL_CALCULATED:', yes_avg_value_LDL_CHOLESTEROL_CALCULATED.mean())
print('no follow up mean avg_value_LDL_CHOLESTEROL_CALCULATED:', no_avg_value_LDL_CHOLESTEROL_CALCULATED.mean())
print('avg_value_LDL_CHOLESTEROL_CALCULATED p-value:', p_value)

follow up mean avg_value_LDL_CHOLESTEROL_CALCULATED: 125.45454545454545
no follow up mean avg_value_LDL_CHOLESTEROL_CALCULATED: 56.0
avg_value_LDL_CHOLESTEROL_CALCULATED p-value: 0.1472963199060483


In [72]:
# max_value_LDL_CHOLESTEROL_CALCULATED

yes_max_value_LDL_CHOLESTEROL_CALCULATED = yes['max_value_LDL CHOLESTEROL, CALCULATED'].dropna().to_numpy()
no_max_value_LDL_CHOLESTEROL_CALCULATED = no['max_value_LDL CHOLESTEROL, CALCULATED'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_max_value_LDL_CHOLESTEROL_CALCULATED, no_max_value_LDL_CHOLESTEROL_CALCULATED, equal_var=True) 
print('follow up mean max_value_LDL_CHOLESTEROL_CALCULATED:', yes_max_value_LDL_CHOLESTEROL_CALCULATED.mean())
print('no follow up mean max_value_LDL_CHOLESTEROL_CALCULATED:', no_max_value_LDL_CHOLESTEROL_CALCULATED.mean())
print('max_value_LDL_CHOLESTEROL_CALCULATED p-value:', p_value)

follow up mean max_value_LDL_CHOLESTEROL_CALCULATED: 125.45454545454545
no follow up mean max_value_LDL_CHOLESTEROL_CALCULATED: 56.0
max_value_LDL_CHOLESTEROL_CALCULATED p-value: 0.1472963199060483
