In [29]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind,chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [30]:
data = pd.read_csv('../processed_data/merged_data.csv')
data.head()

Unnamed: 0,encrypt_mrn,ed_screened,visit_date,new_diagnosis,pcp_followup,max_sbp,min_sbp,max_dbp,min_dbp,max_diff,...,max_value_HEMOGLOBIN A1C,"max_value_LDL CHOLESTEROL, CALCULATED","min_value_CHOLESTEROL, TOTAL",min_value_CREATININE,min_value_GLUCOSE,"min_value_GLUCOSE, POC",min_value_HDL CHOLESTEROL,min_value_HEMOGLOBIN A1C,"min_value_LDL CHOLESTEROL, CALCULATED",visit_type
0,2303579,NYU Brooklyn,2023-01-10,Yes,Yes,133.0,114.0,96.0,64.0,52.0,...,9.3,176.0,268.0,0.74,305.0,230.0,31.0,9.3,176.0,Hospitalization
1,2117042,NYU Long Island,2022-01-13,Yes,Yes,102.0,102.0,69.0,69.0,33.0,...,,,,,,443.0,,,,Hospitalization
2,2117326,NYU Brooklyn,2022-01-14,Yes,No,158.0,148.0,88.0,82.0,76.0,...,11.2,,,0.7,423.0,404.0,,11.2,,Emergency
3,2112782,NYU Brooklyn,2022-01-16,Yes,No,181.0,146.0,93.0,87.0,88.0,...,11.6,,,1.03,418.0,491.0,,11.6,,Emergency
4,2062174,NYU Cobble Hill,2022-01-19,Yes,Yes,177.0,135.0,100.0,68.0,77.0,...,8.3,,,,,181.0,,8.3,,Emergency


In [31]:
# follow up / no follow up
yes = data[data['pcp_followup'] == 'Yes']
no = data[data['pcp_followup'] == 'No']

In [32]:
# nyc_tract

yes_nyc_tract = yes['nyc_tract'].value_counts()
no_nyc_tract = no['nyc_tract'].value_counts()
all_categories = set(yes_nyc_tract.index).union(set(no_nyc_tract.index))
yes_aligned = yes_nyc_tract.reindex(all_categories, fill_value=0)
no_aligned = no_nyc_tract.reindex(all_categories, fill_value=0)
nyc_tract = np.array([yes_aligned, no_aligned])
print(yes_aligned / yes_aligned.sum())
print(no_aligned / no_aligned.sum())
chi2, p, dof, expected = chi2_contingency(nyc_tract)
print('Chi-squared test p-value:', p)

nyc_tract
0.0    0.376238
1.0    0.623762
Name: count, dtype: float64
nyc_tract
0.0    0.28
1.0    0.72
Name: count, dtype: float64
Chi-squared test p-value: 0.2684222506280162


In [33]:
# total_pop

yes_total_pop = yes['total_pop'].dropna().to_numpy()
no_total_pop = no['total_pop'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_total_pop, no_total_pop, equal_var=True) 
print('follow up mean total_pop:', yes_total_pop.mean())
print('no follow up mean total_pop:', no_total_pop.mean())
print('total_pop p-value:', p_value)

follow up mean total_pop: 4698.138613861386
no follow up mean total_pop: 4949.74
total_pop p-value: 0.3930052951179164


In [34]:
# households

yes_households = yes['households'].dropna().to_numpy()
no_households = no['households'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_households, no_households, equal_var=True) 
print('follow up mean households:', yes_households.mean())
print('no follow up mean households:', no_households.mean())
print('households p-value:', p_value)

follow up mean households: 1663.3910891089108
no follow up mean households: 1657.74
households p-value: 0.9643000169255532


In [35]:
# housing_units

yes_housing_units = yes['housing_units'].dropna().to_numpy()
no_housing_units = no['housing_units'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_housing_units, no_housing_units, equal_var=True) 
print('follow up mean housing_units:', yes_housing_units.mean())
print('no follow up mean housing_units:', no_housing_units.mean())
print('housing_units p-value:', p_value)

follow up mean housing_units: 1804.3168316831684
no follow up mean housing_units: 1791.76
housing_units p-value: 0.9314740914850441


In [36]:
# p_children

yes_p_children = yes['p_children'].dropna().to_numpy()
no_p_children = no['p_children'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_children, no_p_children, equal_var=True) 
print('follow up mean p_children:', yes_p_children.mean())
print('no follow up mean p_children:', no_p_children.mean())
print('p_children p-value:', p_value)

follow up mean p_children: 0.21818145613366333
no follow up mean p_children: 0.22982541490000002
p_children p-value: 0.3308343702150246


In [37]:
# p_elderly

yes_p_elderly = yes['p_elderly'].dropna().to_numpy()
no_p_elderly = no['p_elderly'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_elderly, no_p_elderly, equal_var=True) 
print('follow up mean p_elderly:', yes_p_elderly.mean())
print('no follow up mean p_elderly:', no_p_elderly.mean())
print('p_elderly p-value:', p_value)

follow up mean p_elderly: 0.14965579773514853
no follow up mean p_elderly: 0.131295710224
p_elderly p-value: 0.051262473365082224


In [38]:
# p_adults

yes_p_adults = yes['p_adults'].dropna().to_numpy()
no_p_adults = no['p_adults'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_adults, no_p_adults, equal_var=True) 
print('follow up mean p_adults:', yes_p_adults.mean())
print('no follow up mean p_adults:', no_p_adults.mean())
print('p_adults p-value:', p_value)

follow up mean p_adults: 0.6321627466336632
no follow up mean p_adults: 0.6388788766
p_adults p-value: 0.5854107096957211


In [39]:
# p_female

yes_p_female = yes['p_female'].dropna().to_numpy()
no_p_female = no['p_female'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_female, no_p_female, equal_var=True) 
print('follow up mean p_female:', yes_p_female.mean())
print('no follow up mean p_female:', no_p_female.mean())
print('p_female p-value:', p_value)

follow up mean p_female: 0.5202583525742575
no follow up mean p_female: 0.5186353838
p_female p-value: 0.8154372331090781


In [40]:
# mdn_age

yes_mdn_age = yes['mdn_age'].dropna().to_numpy()
no_mdn_age = no['mdn_age'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_mdn_age, no_mdn_age, equal_var=True) 
print('follow up mean mdn_age:', yes_mdn_age.mean())
print('no follow up mean mdn_age:', no_mdn_age.mean())
print('mdn_age p-value:', p_value)

follow up mean mdn_age: 38.429702920792074
no follow up mean mdn_age: 36.60600004
mdn_age p-value: 0.06979499678998277


In [41]:
# p_nhwhite

yes_p_nhwhite = yes['p_nhwhite'].dropna().to_numpy()
no_p_nhwhite = no['p_nhwhite'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_nhwhite, no_p_nhwhite, equal_var=True) 
print('follow up mean p_nhwhite:', yes_p_nhwhite.mean())
print('no follow up mean p_nhwhite:', no_p_nhwhite.mean())
print('p_nhwhite p-value:', p_value)

follow up mean p_nhwhite: 0.4106139625705446
no follow up mean p_nhwhite: 0.28645493445
p_nhwhite p-value: 0.007186049417806996


In [42]:
# p_nhblack

yes_p_nhblack = yes['p_nhblack'].dropna().to_numpy()
no_p_nhblack = no['p_nhblack'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_nhblack, no_p_nhblack, equal_var=True) 
print('follow up mean p_nhblack:', yes_p_nhblack.mean())
print('no follow up mean p_nhblack:', no_p_nhblack.mean())
print('p_nhblack p-value:', p_value)

follow up mean p_nhblack: 0.1934355873269307
no follow up mean p_nhblack: 0.17554934482599996
p_nhblack p-value: 0.6362545038330165


In [43]:
# p_hispanic

yes_p_hispanic = yes['p_hispanic'].dropna().to_numpy()
no_p_hispanic = no['p_hispanic'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_hispanic, no_p_hispanic, equal_var=True) 
print('follow up mean p_hispanic:', yes_p_hispanic.mean())
print('no follow up mean p_hispanic:', no_p_hispanic.mean())
print('p_hispanic p-value:', p_value)

follow up mean p_hispanic: 0.23694309061237623
no follow up mean p_hispanic: 0.31668379463999996
p_hispanic p-value: 0.012585728175877964


In [44]:
# p_nhasian

yes_p_nhasian = yes['p_nhasian'].dropna().to_numpy()
no_p_nhasian = no['p_nhasian'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_nhasian, no_p_nhasian, equal_var=True) 
print('follow up mean p_nhasian:', yes_p_nhasian.mean())
print('no follow up mean p_nhasian:', no_p_nhasian.mean())
print('p_nhasian p-value:', p_value)

follow up mean p_nhasian: 0.11979168779034653
no follow up mean p_nhasian: 0.178762203364
p_nhasian p-value: 0.009846837598544104


In [45]:
# p_other

yes_p_other = yes['p_other'].dropna().to_numpy()
no_p_other = no['p_other'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_other, no_p_other, equal_var=True) 
print('follow up mean p_other:', yes_p_other.mean())
print('no follow up mean p_other:', no_p_other.mean())
print('p_other p-value:', p_value)

follow up mean p_other: 0.03921567160172948
no follow up mean p_other: 0.042549723033770324
p_other p-value: 0.6145723116450477


In [46]:
# p_moved

yes_p_moved = yes['p_moved'].dropna().to_numpy()
no_p_moved = no['p_moved'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_moved, no_p_moved, equal_var=True) 
print('follow up mean p_moved:', yes_p_moved.mean())
print('no follow up mean p_moved:', no_p_moved.mean())
print('p_moved p-value:', p_value)

follow up mean p_moved: 0.08828295466138614
no follow up mean p_moved: 0.09716681164
p_moved p-value: 0.3644258748061199


In [47]:
# p_longcommute

yes_p_longcommute = yes['p_longcommute'].dropna().to_numpy()
no_p_longcommute = no['p_longcommute'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_longcommute, no_p_longcommute, equal_var=True) 
print('follow up mean p_longcommute:', yes_p_longcommute.mean())
print('no follow up mean p_longcommute:', no_p_longcommute.mean())
print('p_longcommute p-value:', p_value)

follow up mean p_longcommute: 0.07164248420544554
no follow up mean p_longcommute: 0.07089149956200001
p_longcommute p-value: 0.9250102828722315


In [48]:
# p_marriednone

yes_p_marriednone = yes['p_marriednone'].dropna().to_numpy()
no_p_marriednone = no['p_marriednone'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_marriednone, no_p_marriednone, equal_var=True) 
print('follow up mean p_marriednone:', yes_p_marriednone.mean())
print('no follow up mean p_marriednone:', no_p_marriednone.mean())
print('p_marriednone p-value:', p_value)

follow up mean p_marriednone: 0.25463179787128715
no follow up mean p_marriednone: 0.22687062416
p_marriednone p-value: 0.10344317139472445


In [49]:
# p_marriedkids

yes_p_marriedkids = yes['p_marriedkids'].dropna().to_numpy()
no_p_marriedkids = no['p_marriedkids'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_marriedkids, no_p_marriedkids, equal_var=True) 
print('follow up mean p_marriedkids:', yes_p_marriedkids.mean())
print('no follow up mean p_marriedkids:', no_p_marriedkids.mean())
print('p_marriedkids p-value:', p_value)

follow up mean p_marriedkids: 0.20039146238613864
no follow up mean p_marriedkids: 0.20734773101999998
p_marriedkids p-value: 0.6588666533826775


In [50]:
# p_singlenone

yes_p_singlenone = yes['p_singlenone'].dropna().to_numpy()
no_p_singlenone = no['p_singlenone'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_singlenone, no_p_singlenone, equal_var=True) 
print('follow up mean p_singlenone:', yes_p_singlenone.mean())
print('no follow up mean p_singlenone:', no_p_singlenone.mean())
print('p_singlenone p-value:', p_value)

follow up mean p_singlenone: 0.4616781934158416
no follow up mean p_singlenone: 0.4748783563999999
p_singlenone p-value: 0.5917679812889618


In [51]:
# p_malekids

yes_p_malekids = yes['p_malekids'].dropna().to_numpy()
no_p_malekids = no['p_malekids'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_malekids, no_p_malekids, equal_var=True) 
print('follow up mean p_malekids:', yes_p_malekids.mean())
print('no follow up mean p_malekids:', no_p_malekids.mean())
print('p_malekids p-value:', p_value)

follow up mean p_malekids: 0.009494221797524753
no follow up mean p_malekids: 0.009227758686
p_malekids p-value: 0.90555372758643


In [52]:
# p_femalekids

yes_p_femalekids = yes['p_femalekids'].dropna().to_numpy()
no_p_femalekids = no['p_femalekids'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_femalekids, no_p_femalekids, equal_var=True) 
print('follow up mean p_femalekids:', yes_p_femalekids.mean())
print('no follow up mean p_femalekids:', no_p_femalekids.mean())
print('p_femalekids p-value:', p_value)

follow up mean p_femalekids: 0.05470147263811882
no follow up mean p_femalekids: 0.054260302914
p_femalekids p-value: 0.9585281906849958


In [53]:
# p_cohabitkids

yes_p_cohabitkids = yes['p_cohabitkids'].dropna().to_numpy()
no_p_cohabitkids = no['p_cohabitkids'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_cohabitkids, no_p_cohabitkids, equal_var=True) 
print('follow up mean p_cohabitkids:', yes_p_cohabitkids.mean())
print('no follow up mean p_cohabitkids:', no_p_cohabitkids.mean())
print('p_cohabitkids p-value:', p_value)

follow up mean p_cohabitkids: 0.019102850653465345
no follow up mean p_cohabitkids: 0.027415226128000004
p_cohabitkids p-value: 0.027244197398262438


In [54]:
# p_nohsdeg

yes_p_nohsdeg = yes['p_nohsdeg'].dropna().to_numpy()
no_p_nohsdeg = no['p_nohsdeg'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_nohsdeg, no_p_nohsdeg, equal_var=True) 
print('follow up mean p_nohsdeg:', yes_p_nohsdeg.mean())
print('no follow up mean p_nohsdeg:', no_p_nohsdeg.mean())
print('p_nohsdeg p-value:', p_value)

follow up mean p_nohsdeg: 0.15588321309306927
no follow up mean p_nohsdeg: 0.20844235536000003
p_nohsdeg p-value: 0.005767990349410308


In [55]:
# p_hsonly

yes_p_hsonly = yes['p_hsonly'].dropna().to_numpy()
no_p_hsonly = no['p_hsonly'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_hsonly, no_p_hsonly, equal_var=True) 
print('follow up mean p_hsonly:', yes_p_hsonly.mean())
print('no follow up mean p_hsonly:', no_p_hsonly.mean())
print('p_hsonly p-value:', p_value)

follow up mean p_hsonly: 0.23977880238465346
no follow up mean p_hsonly: 0.25036971648
p_hsonly p-value: 0.5222695162411077


In [56]:
# p_somecollege

yes_p_somecollege = yes['p_somecollege'].dropna().to_numpy()
no_p_somecollege = no['p_somecollege'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_somecollege, no_p_somecollege, equal_var=True) 
print('follow up mean p_somecollege:', yes_p_somecollege.mean())
print('no follow up mean p_somecollege:', no_p_somecollege.mean())
print('p_somecollege p-value:', p_value)

follow up mean p_somecollege: 0.1325216445891089
no follow up mean p_somecollege: 0.13222630993999998
p_somecollege p-value: 0.9727387517077359


In [57]:
# p_collegeplus

yes_p_collegeplus = yes['p_collegeplus'].dropna().to_numpy()
no_p_collegeplus = no['p_collegeplus'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_collegeplus, no_p_collegeplus, equal_var=True) 
print('follow up mean p_collegeplus:', yes_p_collegeplus.mean())
print('no follow up mean p_collegeplus:', no_p_collegeplus.mean())
print('p_collegeplus p-value:', p_value)

follow up mean p_collegeplus: 0.4718163400990099
no follow up mean p_collegeplus: 0.408961621
p_collegeplus p-value: 0.049216382029972125


In [58]:
# p_onlyenglish

yes_p_onlyenglish = yes['p_onlyenglish'].dropna().to_numpy()
no_p_onlyenglish = no['p_onlyenglish'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_onlyenglish, no_p_onlyenglish, equal_var=True) 
print('follow up mean p_onlyenglish:', yes_p_onlyenglish.mean())
print('no follow up mean p_onlyenglish:', no_p_onlyenglish.mean())
print('p_onlyenglish p-value:', p_value)

follow up mean p_onlyenglish: 0.5540028207722772
no follow up mean p_onlyenglish: 0.4249028263
p_onlyenglish p-value: 0.00018653248880466875


In [59]:
# p_spanishlimited

yes_p_spanishlimited = yes['p_spanishlimited'].dropna().to_numpy()
no_p_spanishlimited = no['p_spanishlimited'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_spanishlimited, no_p_spanishlimited, equal_var=True) 
print('follow up mean p_spanishlimited:', yes_p_spanishlimited.mean())
print('no follow up mean p_spanishlimited:', no_p_spanishlimited.mean())
print('p_spanishlimited p-value:', p_value)

follow up mean p_spanishlimited: 0.0570984791580198
no follow up mean p_spanishlimited: 0.087819292476
p_spanishlimited p-value: 0.02586288193908005


In [60]:
# p_asianlimited

yes_p_asianlimited = yes['p_asianlimited'].dropna().to_numpy()
no_p_asianlimited = no['p_asianlimited'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_asianlimited, no_p_asianlimited, equal_var=True) 
print('follow up mean p_asianlimited:', yes_p_asianlimited.mean())
print('no follow up mean p_asianlimited:', no_p_asianlimited.mean())
print('p_asianlimited p-value:', p_value)

follow up mean p_asianlimited: 0.026509982183663367
no follow up mean p_asianlimited: 0.046673201508
p_asianlimited p-value: 0.06515654839562542


In [61]:
# p_otherlimited

yes_p_otherlimited = yes['p_otherlimited'].dropna().to_numpy()
no_p_otherlimited = no['p_otherlimited'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_otherlimited, no_p_otherlimited, equal_var=True) 
print('follow up mean p_otherlimited:', yes_p_otherlimited.mean())
print('no follow up mean p_otherlimited:', no_p_otherlimited.mean())
print('p_otherlimited p-value:', p_value)

follow up mean p_otherlimited: 0.03456472793316832
no follow up mean p_otherlimited: 0.059590947692
p_otherlimited p-value: 0.018573195538837035


In [64]:
# p_limitedall

yes_p_limitedall = yes['p_limitedall'].dropna().to_numpy()
no_p_limitedall = no['p_limitedall'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_limitedall, no_p_limitedall, equal_var=True) 
print('follow up mean p_limitedall:', yes_p_limitedall.mean())
print('no follow up mean p_limitedall:', no_p_limitedall.mean())
print('p_limitedall p-value:', p_value)

follow up mean p_limitedall: 0.11817318925742573
no follow up mean p_limitedall: 0.19408344107399997
p_limitedall p-value: 0.00047978040050503584


In [66]:
# p_notlimited

yes_p_notlimited = yes['p_notlimited'].dropna().to_numpy()
no_p_notlimited = no['p_notlimited'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_notlimited, no_p_notlimited, equal_var=True) 
print('follow up mean p_notlimited:', yes_p_notlimited.mean())
print('no follow up mean p_notlimited:', no_p_notlimited.mean())
print('p_notlimited p-value:', p_value)

follow up mean p_notlimited: 0.327823988509901
no follow up mean p_notlimited: 0.38101373600000005
p_notlimited p-value: 0.011695941339047552


In [67]:
# p_popbelow1fpl

yes_p_popbelow1fpl = yes['p_popbelow1fpl'].dropna().to_numpy()
no_p_popbelow1fpl = no['p_popbelow1fpl'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_popbelow1fpl, no_p_popbelow1fpl, equal_var=True) 
print('follow up mean p_popbelow1fpl:', yes_p_popbelow1fpl.mean())
print('no follow up mean p_popbelow1fpl:', no_p_popbelow1fpl.mean())
print('p_popbelow1fpl p-value:', p_value)

follow up mean p_popbelow1fpl: 0.1356102599950495
no follow up mean p_popbelow1fpl: 0.17898155266
p_popbelow1fpl p-value: 0.026050045226489776


In [68]:
# p_popbelow2fpl

yes_p_popbelow2fpl = yes['p_popbelow2fpl'].dropna().to_numpy()
no_p_popbelow2fpl = no['p_popbelow2fpl'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_popbelow2fpl, no_p_popbelow2fpl, equal_var=True) 
print('follow up mean p_popbelow2fpl:', yes_p_popbelow2fpl.mean())
print('no follow up mean p_popbelow2fpl:', no_p_popbelow2fpl.mean())
print('p_popbelow2fpl p-value:', p_value)

follow up mean p_popbelow2fpl: 0.2797845124009901
no follow up mean p_popbelow2fpl: 0.36294572826000004
p_popbelow2fpl p-value: 0.004392580713773799


In [69]:
# p_povmarriedfam

yes_p_povmarriedfam = yes['p_povmarriedfam'].dropna().to_numpy()
no_p_povmarriedfam = no['p_povmarriedfam'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_povmarriedfam, no_p_povmarriedfam, equal_var=True) 
print('follow up mean p_povmarriedfam:', yes_p_povmarriedfam.mean())
print('no follow up mean p_povmarriedfam:', no_p_povmarriedfam.mean())
print('p_povmarriedfam p-value:', p_value)

follow up mean p_povmarriedfam: 0.02700624154306931
no follow up mean p_povmarriedfam: 0.05020877951399999
p_povmarriedfam p-value: 0.014201089135882132


In [70]:
# p_povmalefam

yes_p_povmalefam = yes['p_povmalefam'].dropna().to_numpy()
no_p_povmalefam = no['p_povmalefam'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_povmalefam, no_p_povmalefam, equal_var=True) 
print('follow up mean p_povmalefam:', yes_p_povmalefam.mean())
print('no follow up mean p_povmalefam:', no_p_povmalefam.mean())
print('p_povmalefam p-value:', p_value)

follow up mean p_povmalefam: 0.0071548278866831695
no follow up mean p_povmalefam: 0.005368148544
p_povmalefam p-value: 0.45448021579360154


In [71]:
# p_povfemalefam

yes_p_povfemalefam = yes['p_povfemalefam'].dropna().to_numpy()
no_p_povfemalefam = no['p_povfemalefam'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_povfemalefam, no_p_povfemalefam, equal_var=True) 
print('follow up mean p_povfemalefam:', yes_p_povfemalefam.mean())
print('no follow up mean p_povfemalefam:', no_p_povfemalefam.mean())
print('p_povfemalefam p-value:', p_value)

follow up mean p_povfemalefam: 0.038490707878217824
no follow up mean p_povfemalefam: 0.04129077882200001
p_povfemalefam p-value: 0.7693968035861455


In [72]:
# hh_mdnincome

yes_hh_mdnincome = yes['hh_mdnincome'].dropna().to_numpy()
no_hh_mdnincome = no['hh_mdnincome'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_hh_mdnincome, no_hh_mdnincome, equal_var=True) 
print('follow up mean hh_mdnincome:', yes_hh_mdnincome.mean())
print('no follow up mean hh_mdnincome:', no_hh_mdnincome.mean())
print('hh_mdnincome p-value:', p_value)

follow up mean hh_mdnincome: 95393.41293532339
no follow up mean hh_mdnincome: 77582.48
hh_mdnincome p-value: 0.011204983314145964


In [73]:
# p_pubassist

yes_p_pubassist = yes['p_pubassist'].dropna().to_numpy()
no_p_pubassist = no['p_pubassist'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_pubassist, no_p_pubassist, equal_var=True) 
print('follow up mean p_pubassist:', yes_p_pubassist.mean())
print('no follow up mean p_pubassist:', no_p_pubassist.mean())
print('p_pubassist p-value:', p_value)

follow up mean p_pubassist: 0.9626811503465346
no follow up mean p_pubassist: 0.9549708365999998
p_pubassist p-value: 0.2671404902605956


In [74]:
# p_foodstamps

yes_p_foodstamps = yes['p_foodstamps'].dropna().to_numpy()
no_p_foodstamps = no['p_foodstamps'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_foodstamps, no_p_foodstamps, equal_var=True) 
print('follow up mean p_foodstamps:', yes_p_foodstamps.mean())
print('no follow up mean p_foodstamps:', no_p_foodstamps.mean())
print('p_foodstamps p-value:', p_value)

follow up mean p_foodstamps: 0.15272412666881188
no follow up mean p_foodstamps: 0.20654407094
p_foodstamps p-value: 0.02867810046527987


In [75]:
# p_assistorfood

yes_p_assistorfood = yes['p_assistorfood'].dropna().to_numpy()
no_p_assistorfood = no['p_assistorfood'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_assistorfood, no_p_assistorfood, equal_var=True) 
print('follow up mean p_assistorfood:', yes_p_assistorfood.mean())
print('no follow up mean p_assistorfood:', no_p_assistorfood.mean())
print('p_assistorfood p-value:', p_value)

follow up mean p_assistorfood: 0.8367731666831684
no follow up mean p_assistorfood: 0.7836644423999999
p_assistorfood p-value: 0.03159220272814488


In [76]:
# p_unemployed

yes_p_unemployed = yes['p_unemployed'].dropna().to_numpy()
no_p_unemployed = no['p_unemployed'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_unemployed, no_p_unemployed, equal_var=True) 
print('follow up mean p_unemployed:', yes_p_unemployed.mean())
print('no follow up mean p_unemployed:', no_p_unemployed.mean())
print('p_unemployed p-value:', p_value)

follow up mean p_unemployed: 0.06329186250990099
no follow up mean p_unemployed: 0.06763203584000001
p_unemployed p-value: 0.5328247613259829


In [77]:
# h_vacant

yes_h_vacant = yes['h_vacant'].dropna().to_numpy()
no_h_vacant = no['h_vacant'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_vacant, no_h_vacant, equal_var=True) 
print('follow up mean h_vacant:', yes_h_vacant.mean())
print('no follow up mean h_vacant:', no_h_vacant.mean())
print('h_vacant p-value:', p_value)

follow up mean h_vacant: 0.07271387069059405
no follow up mean h_vacant: 0.07495217272
h_vacant p-value: 0.7972777347936559


In [78]:
# h_renter

yes_h_renter = yes['h_renter'].dropna().to_numpy()
no_h_renter = no['h_renter'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_renter, no_h_renter, equal_var=True) 
print('follow up mean h_renter:', yes_h_renter.mean())
print('no follow up mean h_renter:', no_h_renter.mean())
print('h_renter p-value:', p_value)

follow up mean h_renter: 0.49786316770792083
no follow up mean h_renter: 0.6331857423999999
h_renter p-value: 0.00404516013240737


In [79]:
# h_occupants

yes_h_occupants = yes['h_occupants'].dropna().to_numpy()
no_h_occupants = no['h_occupants'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_occupants, no_h_occupants, equal_var=True) 
print('follow up mean h_occupants:', yes_h_occupants.mean())
print('no follow up mean h_occupants:', no_h_occupants.mean())
print('h_occupants p-value:', p_value)

follow up mean h_occupants: 0.006488016681188119
no follow up mean h_occupants: 0.010750185798
h_occupants p-value: 0.04369278239249115


In [80]:
# h_novehicles

yes_h_novehicles = yes['h_novehicles'].dropna().to_numpy()
no_h_novehicles = no['h_novehicles'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_novehicles, no_h_novehicles, equal_var=True) 
print('follow up mean h_novehicles:', yes_h_novehicles.mean())
print('no follow up mean h_novehicles:', no_h_novehicles.mean())
print('h_novehicles p-value:', p_value)

follow up mean h_novehicles: 0.3649638334366337
no follow up mean h_novehicles: 0.44906783782
h_novehicles p-value: 0.061580477700744714


In [81]:
# h_mdnrent

yes_h_mdnrent = yes['h_mdnrent'].dropna().to_numpy()
no_h_mdnrent = no['h_mdnrent'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_mdnrent, no_h_mdnrent, equal_var=True) 
print('follow up mean h_mdnrent:', yes_h_mdnrent.mean())
print('no follow up mean h_mdnrent:', no_h_mdnrent.mean())
print('h_mdnrent p-value:', p_value)

follow up mean h_mdnrent: 1848.1347150259066
no follow up mean h_mdnrent: 1752.4583333333333
h_mdnrent p-value: 0.3348777662057969


In [82]:
# h_rentpercent

yes_h_rentpercent = yes['h_rentpercent'].dropna().to_numpy()
no_h_rentpercent = no['h_rentpercent'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_rentpercent, no_h_rentpercent, equal_var=True) 
print('follow up mean h_rentpercent:', yes_h_rentpercent.mean())
print('no follow up mean h_rentpercent:', no_h_rentpercent.mean())
print('h_rentpercent p-value:', p_value)

follow up mean h_rentpercent: 0.33120102896907216
no follow up mean h_rentpercent: 0.3495599988
h_rentpercent p-value: 0.15395488289968978


In [83]:
# h_houseprice

yes_h_houseprice = yes['h_houseprice'].dropna().to_numpy()
no_h_houseprice = no['h_houseprice'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_houseprice, no_h_houseprice, equal_var=True) 
print('follow up mean h_houseprice:', yes_h_houseprice.mean())
print('no follow up mean h_houseprice:', no_h_houseprice.mean())
print('h_houseprice p-value:', p_value)

follow up mean h_houseprice: 727349.241025641
no follow up mean h_houseprice: 683117.0212765958
h_houseprice p-value: 0.42125373788146325


In [84]:
# p_private

yes_p_private = yes['p_private'].dropna().to_numpy()
no_p_private = no['p_private'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_private, no_p_private, equal_var=True) 
print('follow up mean p_private:', yes_p_private.mean())
print('no follow up mean p_private:', no_p_private.mean())
print('p_private p-value:', p_value)

follow up mean p_private: 0.4800259750792079
no follow up mean p_private: 0.3948984266
p_private p-value: 0.002543384624900946


In [85]:
# p_medicare

yes_p_medicare = yes['p_medicare'].dropna().to_numpy()
no_p_medicare = no['p_medicare'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_medicare, no_p_medicare, equal_var=True) 
print('follow up mean p_medicare:', yes_p_medicare.mean())
print('no follow up mean p_medicare:', no_p_medicare.mean())
print('p_medicare p-value:', p_value)

follow up mean p_medicare: 0.09564838141534654
no follow up mean p_medicare: 0.07540773412999999
p_medicare p-value: 0.008830538378284819


In [86]:
# p_medicaid

yes_p_medicaid = yes['p_medicaid'].dropna().to_numpy()
no_p_medicaid = no['p_medicaid'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_medicaid, no_p_medicaid, equal_var=True) 
print('follow up mean p_medicaid:', yes_p_medicaid.mean())
print('no follow up mean p_medicaid:', no_p_medicaid.mean())
print('p_medicaid p-value:', p_value)

follow up mean p_medicaid: 0.2432236716742574
no follow up mean p_medicaid: 0.33130214194
p_medicaid p-value: 0.0025988268308943657


In [87]:
# p_otherinsur

yes_p_otherinsur = yes['p_otherinsur'].dropna().to_numpy()
no_p_otherinsur = no['p_otherinsur'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_otherinsur, no_p_otherinsur, equal_var=True) 
print('follow up mean p_otherinsur:', yes_p_otherinsur.mean())
print('no follow up mean p_otherinsur:', no_p_otherinsur.mean())
print('p_otherinsur p-value:', p_value)

follow up mean p_otherinsur: 0.11954375487623763
no follow up mean p_otherinsur: 0.10532024738399999
p_otherinsur p-value: 0.09052259802944301


In [88]:
# p_uninsured

yes_p_uninsured = yes['p_uninsured'].dropna().to_numpy()
no_p_uninsured = no['p_uninsured'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_uninsured, no_p_uninsured, equal_var=True) 
print('follow up mean p_uninsured:', yes_p_uninsured.mean())
print('no follow up mean p_uninsured:', no_p_uninsured.mean())
print('p_uninsured p-value:', p_value)

follow up mean p_uninsured: 0.061558216305445536
no follow up mean p_uninsured: 0.09307145026000001
p_uninsured p-value: 4.218348447939471e-05


In [89]:
# h_nointernet

yes_h_nointernet = yes['h_nointernet'].dropna().to_numpy()
no_h_nointernet = no['h_nointernet'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_nointernet, no_h_nointernet, equal_var=True) 
print('follow up mean h_nointernet:', yes_h_nointernet.mean())
print('no follow up mean h_nointernet:', no_h_nointernet.mean())
print('h_nointernet p-value:', p_value)

follow up mean h_nointernet: 0.10668857386584159
no follow up mean h_nointernet: 0.11191166768000001
h_nointernet p-value: 0.699228451605655


In [90]:
# h_nocomputer

yes_h_nocomputer = yes['h_nocomputer'].dropna().to_numpy()
no_h_nocomputer = no['h_nocomputer'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_h_nocomputer, no_h_nocomputer, equal_var=True) 
print('follow up mean h_nocomputer:', yes_h_nocomputer.mean())
print('no follow up mean h_nocomputer:', no_h_nocomputer.mean())
print('h_nocomputer p-value:', p_value)

follow up mean h_nocomputer: 0.07499260331881188
no follow up mean h_nocomputer: 0.08093668903600001
h_nocomputer p-value: 0.5882146879254095


In [91]:
# p_foreign

yes_p_foreign = yes['p_foreign'].dropna().to_numpy()
no_p_foreign = no['p_foreign'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_foreign, no_p_foreign, equal_var=True) 
print('follow up mean p_foreign:', yes_p_foreign.mean())
print('no follow up mean p_foreign:', no_p_foreign.mean())
print('p_foreign p-value:', p_value)

follow up mean p_foreign: 0.3148432666188119
no follow up mean p_foreign: 0.39080735240000003
p_foreign p-value: 0.002016375096063836


In [92]:
# p_disabled

yes_p_disabled = yes['p_disabled'].dropna().to_numpy()
no_p_disabled = no['p_disabled'].dropna().to_numpy()
t_stat, p_value = ttest_ind(yes_p_disabled, no_p_disabled, equal_var=True) 
print('follow up mean p_disabled:', yes_p_disabled.mean())
print('no follow up mean p_disabled:', no_p_disabled.mean())
print('p_disabled p-value:', p_value)

follow up mean p_disabled: 0.0920380483341584
no follow up mean p_disabled: 0.09482295820000002
p_disabled p-value: 0.678853112929203
