Script to estimate change in stay at home devices pre and post covid:

1. Combines data of all months (Jan 1 to April 24).

2. Applies moving average and stratified re-weighting to estimate ratio of at home devices.


3. Finds median at home device ratio for pre covid based on county and day of the week(similar to google dataset)

4. Then estimates change in stay at home after covid.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import os
import pandas as pd
import numpy as np

In [0]:
# Combine all the month social distancing county data in 1 pandas file
# TODO: Change folder and folder_fips according to drive


folder = '/content/drive/My Drive/Grad Courses/CSE547/COVID19 data analysis/safegraph/social-distancing/2020/'
folder_fips = "/content/drive/My Drive/Grad Courses/CSE547/COVID19 data analysis/safegraph/safegraph_open_census_data/metadata/cbg_fips_codes.csv"
month_1 = pd.read_feather(folder+'01'+'_county_level_device_home')
month_2 = pd.read_feather(folder+'02'+'_county_level_device_home')
month_3 = pd.read_feather(folder+'03'+'_county_level_device_home')
month_4 = pd.read_feather(folder+'04'+'_county_level_device_home')

all_months = pd.concat((month_1,month_2,month_3,month_4),ignore_index=True)
all_months.sort_values(by=['date_range_start'],ignore_index=True, inplace=True)


In [4]:
# Apply 7 day moving average to device count and completely_home_device_count and remove initial 7 values (Jan 1 to Jan 6)
def rolling_mean(sort_gp):
  sort_gp = sort_gp.sort_values(by=['date_range_start'], ignore_index = 'True')
  sort_gp['device_count_avg'] = sort_gp['device_count'].rolling(window=7).mean()
  sort_gp['at_home_avg'] = sort_gp['completely_home_device_count'].rolling(window=7).mean()
  sort_gp = sort_gp.loc[6:]
  return sort_gp

all_months_avg = all_months.groupby(['state','county'],as_index=False).apply(rolling_mean)

# Change from multiindex to level 0
all_months_avg =all_months_avg.reset_index(0).reset_index(drop=True)
all_months_avg = all_months_avg.drop(columns=['level_0'])
all_months_avg 

Unnamed: 0,state,county,date_range_start,device_count,completely_home_device_count,unique,device_count_avg,at_home_avg
0,AK,Aleutians East Borough,2020-01-07,8.666667,2.000000,2013.0,9.642857,3.523810
1,AK,Aleutians East Borough,2020-01-08,10.666667,3.666667,2013.0,9.880952,3.571429
2,AK,Aleutians East Borough,2020-01-09,13.500000,5.000000,2013.0,10.523810,3.714286
3,AK,Aleutians East Borough,2020-01-10,11.000000,3.333333,2013.0,10.380952,3.476190
4,AK,Aleutians East Borough,2020-01-11,9.000000,4.333333,2013.0,10.166667,3.666667
...,...,...,...,...,...,...,...,...
351408,WY,Weston County,2020-04-20,55.600000,22.800000,56045.0,51.828571,18.971429
351409,WY,Weston County,2020-04-21,59.800000,17.000000,56045.0,53.114286,18.371429
351410,WY,Weston County,2020-04-22,60.600000,17.000000,56045.0,54.314286,18.457143
351411,WY,Weston County,2020-04-23,58.000000,18.000000,56045.0,54.771429,18.400000


In [5]:
# Borrowed from safegraph colab file for cleaning data
# Find state wise mean of device_count/total_device
sdm_columns = ['device_count_avg', 'at_home_avg']
geo_cols = ['state']
time_keys = ['date_range_start']
df_state = all_months_avg.groupby(geo_cols + time_keys)[sdm_columns].sum(min_count=1).sort_values(by=geo_cols + time_keys, ascending=True).reset_index()
df_state['mean_est'] = df_state['at_home_avg'] / df_state['device_count_avg'] 
df_state.head()

Unnamed: 0,state,date_range_start,device_count_avg,at_home_avg,mean_est
0,AK,2020-01-07,921.08725,288.106035,0.312789
1,AK,2020-01-08,910.602115,270.165582,0.296689
2,AK,2020-01-09,906.538089,262.801306,0.289895
3,AK,2020-01-10,901.735639,253.966132,0.281641
4,AK,2020-01-11,893.126679,250.634457,0.280626


In [6]:
# Borrowed from safegraph colab file for cleaning data
# Find within state variance of ratio
sdm_columns = ['device_count_avg', 'at_home_avg'] 
geo_groupby= ['state', 'county']
time_keys = ['date_range_start']
df_county = all_months_avg.groupby(geo_groupby + time_keys)[sdm_columns].sum(min_count=1).sort_values(by=geo_groupby + time_keys, ascending=True).reset_index()

# compute new metrics
df_county['ratio_device_home_avg'] = df_county['at_home_avg'] / df_county['device_count_avg']
df_county
var_within_state = df_county.groupby(geo_cols+ time_keys)['ratio_device_home_avg'].var().to_frame().add_suffix("_var").reset_index()
var_within_state

Unnamed: 0,state,date_range_start,ratio_device_home_avg_var
0,AK,2020-01-07,0.005771
1,AK,2020-01-08,0.004996
2,AK,2020-01-09,0.005161
3,AK,2020-01-10,0.004511
4,AK,2020-01-11,0.004738
...,...,...,...
6045,WY,2020-04-20,0.000318
6046,WY,2020-04-21,0.000277
6047,WY,2020-04-22,0.000205
6048,WY,2020-04-23,0.000207


In [7]:
# Borrowed from safegraph colab file for cleaning data
# estimate the beta distribution BEta(alpha, beta) for counties (by state)
# We use the MEAN estimate from our overall earlier state estimates
# We use the variance estimates from the empircal bayes calculation of the variance of counties within each state (i.e., our certainty in the state_mean is >> our certainy in individual county means)

def estimate_alpha_beta(mean_, var_):
  # estimating beta distribution parameters with the method of moments. see BDA 3rd ED Gelman et al., pg. 583
  if(np.isnan(var_)):
    return(1,1)
  alpha_plus_beta = mean_*(1-mean_)/var_ - 1
  alpha_ = alpha_plus_beta * mean_
  beta_ = alpha_plus_beta - alpha_
  return(alpha_,beta_)

def estimate_alpha(mean_, var_):
  alpha, beta = estimate_alpha_beta(mean_, var_)
  return(alpha)

def estimate_beta(mean_, var_):
  alpha, beta = estimate_alpha_beta(mean_, var_)
  return(beta)


state_params = var_within_state.merge(df_state[time_keys + ['state', 'mean_est']].rename({'mean_est' : 'mean_estimate_for_state'}, axis=1), on=['state']+time_keys)
state_params


Unnamed: 0,state,date_range_start,ratio_device_home_avg_var,mean_estimate_for_state
0,AK,2020-01-07,0.005771,0.312789
1,AK,2020-01-08,0.004996,0.296689
2,AK,2020-01-09,0.005161,0.289895
3,AK,2020-01-10,0.004511,0.281641
4,AK,2020-01-11,0.004738,0.280626
...,...,...,...,...
6045,WY,2020-04-20,0.000318,0.364067
6046,WY,2020-04-21,0.000277,0.354382
6047,WY,2020-04-22,0.000205,0.345442
6048,WY,2020-04-23,0.000207,0.339157


In [0]:
# Borrowed from safegraph colab file for cleaning data
state_params['alpha_counties_in_this_state'] = state_params.apply(lambda x: estimate_alpha(x['mean_estimate_for_state'], x['ratio_device_home_avg_var']), axis=1) 
state_params
state_params['beta_counties_in_this_state'] = state_params.apply(lambda x: estimate_beta(x['mean_estimate_for_state'], x['ratio_device_home_avg_var']), axis=1)
state_params
df_county = df_county.merge(state_params, on=['state'] + time_keys)

In [9]:
# Borrowed from safegraph colab file for cleaning data
# To combine the state "prior" with the data observed from the individual county, add the prior alphas, betas with the observed data 
reg_param = 1

df_county['alpha'] = (reg_param * df_county['alpha_counties_in_this_state']  + df_county['at_home_avg'])
df_county['beta'] = (reg_param * df_county['beta_counties_in_this_state'] + df_county['device_count_avg'] - df_county['at_home_avg'])
df_county['model_mean_est'] = df_county['alpha'] / (df_county['alpha'] + df_county['beta'])
df_county['diff'] = np.abs(df_county['model_mean_est'] - df_county['ratio_device_home_avg'])

df_county

Unnamed: 0,state,county,date_range_start,device_count_avg,at_home_avg,ratio_device_home_avg,ratio_device_home_avg_var,mean_estimate_for_state,alpha_counties_in_this_state,beta_counties_in_this_state,alpha,beta,model_mean_est,diff
0,AK,Aleutians East Borough,2020-01-07,9.642857,3.523810,0.365432,0.005771,0.312789,11.336886,24.90762,14.860695,31.026667,0.323852,0.041581
1,AK,Aleutians West Census Area,2020-01-07,19.857143,6.607143,0.332734,0.005771,0.312789,11.336886,24.90762,17.944028,38.157620,0.319849,0.012885
2,AK,Anchorage Municipality,2020-01-07,64.230392,17.811881,0.277312,0.005771,0.312789,11.336886,24.90762,29.148766,71.326131,0.290110,0.012798
3,AK,Bethel Census Area,2020-01-07,25.970274,8.621212,0.331965,0.005771,0.312789,11.336886,24.90762,19.958098,42.256682,0.320794,0.011171
4,AK,Bristol Bay Borough,2020-01-07,16.857143,8.428571,0.500000,0.005771,0.312789,11.336886,24.90762,19.765457,33.336191,0.372219,0.127781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351408,WY,Sweetwater County,2020-04-24,52.781513,16.987395,0.321844,0.000258,0.336181,290.843616,574.29679,307.831011,610.090908,0.335356,0.013513
351409,WY,Teton County,2020-04-24,41.622449,13.775510,0.330963,0.000258,0.336181,290.843616,574.29679,304.619126,602.143729,0.335941,0.004978
351410,WY,Uinta County,2020-04-24,57.794643,19.205357,0.332303,0.000258,0.336181,290.843616,574.29679,310.048973,612.886076,0.335938,0.003635
351411,WY,Washakie County,2020-04-24,37.303571,12.875000,0.345141,0.000258,0.336181,290.843616,574.29679,303.718616,598.725362,0.336551,0.008590


In [11]:
# Add days to the data, data started on tuesday (Jan 7th 2020)
days_total = len(df_county.date_range_start.unique())
days = ['Tues','Wed','Thurs','Fri', 'Sat', 'Sun','Mon']
weekday =[]
count = 0
for val in range(0, days_total):
  if count ==7:
    count=0
  weekday.append(days[count])
  count = count+1


df = pd.DataFrame()
df["days"] = weekday
df["date_range_start"]= df_county.date_range_start.unique()
df_county_days = pd.merge(df_county,df,on='date_range_start', how='inner')
df_county_days

Unnamed: 0,state,county,date_range_start,device_count_avg,at_home_avg,ratio_device_home_avg,ratio_device_home_avg_var,mean_estimate_for_state,alpha_counties_in_this_state,beta_counties_in_this_state,alpha,beta,model_mean_est,diff,days
0,AK,Aleutians East Borough,2020-01-07,9.642857,3.523810,0.365432,0.005771,0.312789,11.336886,24.90762,14.860695,31.026667,0.323852,0.041581,Tues
1,AK,Aleutians West Census Area,2020-01-07,19.857143,6.607143,0.332734,0.005771,0.312789,11.336886,24.90762,17.944028,38.157620,0.319849,0.012885,Tues
2,AK,Anchorage Municipality,2020-01-07,64.230392,17.811881,0.277312,0.005771,0.312789,11.336886,24.90762,29.148766,71.326131,0.290110,0.012798,Tues
3,AK,Bethel Census Area,2020-01-07,25.970274,8.621212,0.331965,0.005771,0.312789,11.336886,24.90762,19.958098,42.256682,0.320794,0.011171,Tues
4,AK,Bristol Bay Borough,2020-01-07,16.857143,8.428571,0.500000,0.005771,0.312789,11.336886,24.90762,19.765457,33.336191,0.372219,0.127781,Tues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351408,WY,Sweetwater County,2020-04-24,52.781513,16.987395,0.321844,0.000258,0.336181,290.843616,574.29679,307.831011,610.090908,0.335356,0.013513,Fri
351409,WY,Teton County,2020-04-24,41.622449,13.775510,0.330963,0.000258,0.336181,290.843616,574.29679,304.619126,602.143729,0.335941,0.004978,Fri
351410,WY,Uinta County,2020-04-24,57.794643,19.205357,0.332303,0.000258,0.336181,290.843616,574.29679,310.048973,612.886076,0.335938,0.003635,Fri
351411,WY,Washakie County,2020-04-24,37.303571,12.875000,0.345141,0.000258,0.336181,290.843616,574.29679,303.718616,598.725362,0.336551,0.008590,Fri


In [12]:
# Seperate data in pre(before Feb 7th 2020) and post groups(from Feb 7th 2020) based on google mobility
df_county_pre = df_county_days[df_county_days['date_range_start'] < '2020-02-07']
df_county_post = df_county_days[df_county_days['date_range_start'] >= '2020-02-07']
df_county_post

Unnamed: 0,state,county,date_range_start,device_count_avg,at_home_avg,ratio_device_home_avg,ratio_device_home_avg_var,mean_estimate_for_state,alpha_counties_in_this_state,beta_counties_in_this_state,alpha,beta,model_mean_est,diff,days
99996,AK,Aleutians East Borough,2020-02-07,23.428571,8.380952,0.357724,0.005955,0.296697,10.099859,23.941168,18.480811,38.988787,0.321575,0.036148,Fri
99997,AK,Aleutians West Census Area,2020-02-07,31.214286,13.928571,0.446224,0.005955,0.296697,10.099859,23.941168,24.028431,41.226882,0.368222,0.078002,Fri
99998,AK,Anchorage Municipality,2020-02-07,58.959088,14.535211,0.246530,0.005955,0.296697,10.099859,23.941168,24.635070,68.365044,0.264893,0.018362,Fri
99999,AK,Bethel Census Area,2020-02-07,26.480556,9.238889,0.348893,0.005955,0.296697,10.099859,23.941168,19.338748,41.182834,0.319535,0.029359,Fri
100000,AK,Bristol Bay Borough,2020-02-07,23.428571,8.142857,0.347561,0.005955,0.296697,10.099859,23.941168,18.242716,39.226882,0.317432,0.030129,Fri
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351408,WY,Sweetwater County,2020-04-24,52.781513,16.987395,0.321844,0.000258,0.336181,290.843616,574.296790,307.831011,610.090908,0.335356,0.013513,Fri
351409,WY,Teton County,2020-04-24,41.622449,13.775510,0.330963,0.000258,0.336181,290.843616,574.296790,304.619126,602.143729,0.335941,0.004978,Fri
351410,WY,Uinta County,2020-04-24,57.794643,19.205357,0.332303,0.000258,0.336181,290.843616,574.296790,310.048973,612.886076,0.335938,0.003635,Fri
351411,WY,Washakie County,2020-04-24,37.303571,12.875000,0.345141,0.000258,0.336181,290.843616,574.296790,303.718616,598.725362,0.336551,0.008590,Fri


In [0]:
# Find median ratio of device at home in pre data for every day of week in a county
def median_est(grp):
  grp['median_v'] = grp['model_mean_est'].median()
  # day = grp.days
  return (grp)

df_county_pre1 =  df_county_pre[['state','county','days','model_mean_est']]
median_ratio = df_county_pre1.groupby(['state','county','days'],as_index=False).apply(median_est)
median_ratio
median_ratio.to_feather(folder+'_calculated_'+'median_weekly_'+'Jan_7_till_Feb_6th')

In [14]:
# Find median ratio of device at home in pre data for every day of week in a county
median_ratio = median_ratio.drop(columns=['model_mean_est'])
df_county_post_med = pd.merge(df_county_post,median_ratio,on=['state','county','days'], how='inner').sort_values(by = ['state','date_range_start'], ignore_index = True)
df_county_post_med = df_county_post_med[['state','county','date_range_start','days','model_mean_est','ratio_device_home_avg','diff','median_v']]
df_county_post_med

Unnamed: 0,state,county,date_range_start,days,model_mean_est,ratio_device_home_avg,diff,median_v
0,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583
1,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583
2,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583
3,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583
4,AK,Aleutians West Census Area,2020-02-07,Fri,0.368222,0.446224,0.078002,0.287141
...,...,...,...,...,...,...,...,...
1111940,WY,Washakie County,2020-04-24,Fri,0.336551,0.345141,0.008590,0.253458
1111941,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467
1111942,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467
1111943,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467


In [15]:
# Calculate post Feb 7th change at stay home devices
# Percentage change is ratio change w.r.t. median
# Difference is raw change from median
df_county_post_med['Percentage Change'] = (df_county_post_med.model_mean_est - df_county_post_med.median_v)*100/df_county_post_med.median_v
df_county_post_med['Change'] = (df_county_post_med.model_mean_est - df_county_post_med.median_v)*100

df_county_post_med

Unnamed: 0,state,county,date_range_start,days,model_mean_est,ratio_device_home_avg,diff,median_v,Percentage Change,Change
0,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583,14.609660,4.099225
1,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583,14.609660,4.099225
2,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583,14.609660,4.099225
3,AK,Aleutians East Borough,2020-02-07,Fri,0.321575,0.357724,0.036148,0.280583,14.609660,4.099225
4,AK,Aleutians West Census Area,2020-02-07,Fri,0.368222,0.446224,0.078002,0.287141,28.237227,8.108070
...,...,...,...,...,...,...,...,...,...,...
1111940,WY,Washakie County,2020-04-24,Fri,0.336551,0.345141,0.008590,0.253458,32.783871,8.309331
1111941,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467,31.689401,8.095588
1111942,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467,31.689401,8.095588
1111943,WY,Weston County,2020-04-24,Fri,0.336423,0.340185,0.003762,0.255467,31.689401,8.095588


In [0]:
df_county_post_med.to_feather(folder+'_calculated_'+'stay_at_home'+'Feb_7_till_April_24')