# Analysis of Week 1 - Week 2 Changes

In [3]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf 
from statsmodels.stats.weightstats import DescrStatsW


svy = pd.read_csv('../20_analysis_datasets/'
                  'merged_surveys_w_analysis_vars.csv')
svy.columns

Index(['Unnamed: 0', 'uniqueID', 'Date', 'Voter File Match',
       'Registered Voter (of Voter File Matches)', 'weight',
       'Q1. Health Quality', 'age', 'DEMOGRAPHICS - GENDER',
       'Q4. Number of People in HH', 'Q5. Children in HH',
       'Q6. Non-HH Face to Face Count', 'Q7. Six Feet Away? (If Q6 > 0)',
       'Q8. HH Member Going to Work',
       'Q9. Children Interacting with Other Children ',
       'Q10. Times in Group > 20 in Last Week', 'Family', 'Friends',
       'Co-workers', 'Clients, patients, or patrons',
       'Any other type of person not already mentioned',
       'Q12. Handwashing Count',
       'Q13. Currently Practicing Social Distancing?',
       'Q14. Currently Experiencing Symptoms?',
       'Q15. Likelihood of getting Coronavirus',
       'Q16. NC Response to Coronavirus', 'Q17. Changes to Routine ',
       'Q18. College Degree', 'Q19. Latino', 'Q20. Race',
       'Q21. Panel Willingness', 'Q19-20. Race + Ethnicity', 'Survey Mode',
       'DEMOGRAPHICS 

## Week 1 v. Week 2 changes!

In [4]:
smf.wls('ever_in_group ~ week2', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,ever_in_group,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,5.859
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.0156
Time:,11:56:36,Log-Likelihood:,-2005.4
No. Observations:,2896,AIC:,4015.0
Df Residuals:,2894,BIC:,4027.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2125,0.011,19.509,0.000,0.191,0.234
week2[T.True],-0.0356,0.015,-2.421,0.016,-0.065,-0.007

0,1,2,3
Omnibus:,990.718,Durbin-Watson:,1.974
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3079.8
Skew:,1.763,Prob(JB):,0.0
Kurtosis:,6.619,Cond. No.,2.74


In [5]:
smf.wls('close_interactions ~ week2', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,close_interactions,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,3.456
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.0631
Time:,11:56:37,Log-Likelihood:,-4637.2
No. Observations:,2664,AIC:,9278.0
Df Residuals:,2662,BIC:,9290.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5005,0.033,15.253,0.000,0.436,0.565
week2[T.True],-0.0815,0.044,-1.859,0.063,-0.168,0.004

0,1,2,3
Omnibus:,2756.659,Durbin-Watson:,2.086
Prob(Omnibus):,0.0,Jarque-Bera (JB):,202928.427
Skew:,5.034,Prob(JB):,0.0
Kurtosis:,44.555,Cond. No.,2.78


In [6]:
smf.wls('any_close_interactions ~ week2', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,any_close_interactions,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,1.473
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.225
Time:,11:56:37,Log-Likelihood:,-1946.3
No. Observations:,2664,AIC:,3897.0
Df Residuals:,2662,BIC:,3908.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2236,0.012,18.710,0.000,0.200,0.247
week2[T.True],-0.0194,0.016,-1.214,0.225,-0.051,0.012

0,1,2,3
Omnibus:,948.993,Durbin-Watson:,2.057
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2926.135
Skew:,1.849,Prob(JB):,0.0
Kurtosis:,6.562,Cond. No.,2.78


## Variation by sub-population

Variation by racial group (omitted category is White):

In [7]:
smf.wls('ever_in_group ~ week2*C(race, Treatment("White"))', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,ever_in_group,R-squared:,0.012
Model:,WLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,6.832
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,2.4e-06
Time:,11:56:39,Log-Likelihood:,-1991.4
No. Observations:,2896,AIC:,3995.0
Df Residuals:,2890,BIC:,4031.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1963,0.013,15.174,0.000,0.171,0.222
week2[T.True],-0.0450,0.017,-2.583,0.010,-0.079,-0.011
"C(race, Treatment(""White""))[T.Black]",0.0446,0.029,1.526,0.127,-0.013,0.102
"C(race, Treatment(""White""))[T.Other]",0.0685,0.033,2.058,0.040,0.003,0.134
"week2[T.True]:C(race, Treatment(""White""))[T.Black]",0.0782,0.039,2.005,0.045,0.002,0.155
"week2[T.True]:C(race, Treatment(""White""))[T.Other]",-0.0382,0.047,-0.816,0.415,-0.130,0.054

0,1,2,3
Omnibus:,1003.356,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3324.805
Skew:,1.754,Prob(JB):,0.0
Kurtosis:,6.905,Cond. No.,9.24


In [8]:
smf.wls('any_close_interactions ~ week2*C(race, Treatment("White"))', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,any_close_interactions,R-squared:,0.003
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.418
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.214
Time:,11:56:39,Log-Likelihood:,-1943.5
No. Observations:,2664,AIC:,3899.0
Df Residuals:,2658,BIC:,3934.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2233,0.014,15.565,0.000,0.195,0.251
week2[T.True],-0.0155,0.019,-0.812,0.417,-0.053,0.022
"C(race, Treatment(""White""))[T.Black]",0.0124,0.031,0.400,0.689,-0.048,0.073
"C(race, Treatment(""White""))[T.Other]",-0.0172,0.038,-0.455,0.649,-0.091,0.057
"week2[T.True]:C(race, Treatment(""White""))[T.Black]",-0.0561,0.041,-1.358,0.175,-0.137,0.025
"week2[T.True]:C(race, Treatment(""White""))[T.Other]",0.0646,0.052,1.245,0.213,-0.037,0.166

0,1,2,3
Omnibus:,935.535,Durbin-Watson:,2.057
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2838.321
Skew:,1.828,Prob(JB):,0.0
Kurtosis:,6.493,Cond. No.,9.53


So Black respondents seem to have a higher baseline likelihood of having been in a group in the last week, and that rate actually increased ever so slightly in week 2 (in contrast to White and Other respondents, who fell a little). 

But also report fewer close interactions in which social distancing wasn't maintained in Week 2, and that number decreased more for them than for White respondents. 

In [9]:
smf.wls('someone_working ~ week2*C(race, Treatment("White"))', svy, weights=svy['weight']).fit().summary()

0,1,2,3
Dep. Variable:,someone_working,R-squared:,0.002
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.9925
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.421
Time:,11:56:40,Log-Likelihood:,-2715.5
No. Observations:,2931,AIC:,5443.0
Df Residuals:,2925,BIC:,5479.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5022,0.016,30.753,0.000,0.470,0.534
week2[T.True],-0.0213,0.022,-0.965,0.335,-0.065,0.022
"C(race, Treatment(""White""))[T.Black]",-0.0152,0.036,-0.416,0.678,-0.087,0.056
"C(race, Treatment(""White""))[T.Other]",-0.0524,0.042,-1.248,0.212,-0.135,0.030
"week2[T.True]:C(race, Treatment(""White""))[T.Black]",-0.0288,0.049,-0.589,0.556,-0.125,0.067
"week2[T.True]:C(race, Treatment(""White""))[T.Other]",0.0758,0.060,1.273,0.203,-0.041,0.193

0,1,2,3
Omnibus:,140.432,Durbin-Watson:,1.939
Prob(Omnibus):,0.0,Jarque-Bera (JB):,122.012
Skew:,0.432,Prob(JB):,3.2000000000000003e-27
Kurtosis:,2.497,Cond. No.,9.23


So Black households are working less in general, and decreased the amount the work more than White households. So likely not work related, interestingly. 

## Appendix: Double check regression using weights same way as basic tool (not inverted)

In [11]:
# Check against share of homes with No kids.
svy1 = svy[svy.week1].copy()

# Helper functions
def get_group_mean(data, question):
    temp = data[[question, 'weight']]
    temp = temp[pd.notnull(temp[question])]
    wsvy = DescrStatsW(temp[question], temp['weight'])
    return wsvy.mean

def get_group_sumweights(data, question):
    temp = data[[question, 'weight']]
    temp = temp[pd.notnull(temp[question])]
    wsvy = DescrStatsW(temp[question], temp['weight'])
    return wsvy.sum_weights


r = svy1.groupby('race').apply(lambda x: get_group_mean(x, 'ever_in_group'))
r

race
Black    0.240851
Other    0.264728
White    0.196275
dtype: float64

In [12]:
r.loc['Black'] - r.loc['White']

0.044575436286915404

In [14]:
smf.wls('ever_in_group ~ C(race, Treatment("White"))', svy1, weights=svy1['weight']).fit().summary()

0,1,2,3
Dep. Variable:,ever_in_group,R-squared:,0.004
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,2.654
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.0707
Time:,11:57:01,Log-Likelihood:,-915.21
No. Observations:,1334,AIC:,1836.0
Df Residuals:,1331,BIC:,1852.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1963,0.013,14.708,0.000,0.170,0.222
"C(race, Treatment(""White""))[T.Black]",0.0446,0.030,1.479,0.139,-0.015,0.104
"C(race, Treatment(""White""))[T.Other]",0.0685,0.034,1.995,0.046,0.001,0.136

0,1,2,3
Omnibus:,342.239,Durbin-Watson:,2.058
Prob(Omnibus):,0.0,Jarque-Bera (JB):,676.704
Skew:,1.525,Prob(JB):,1.14e-147
Kurtosis:,4.695,Cond. No.,3.32
