In [1]:
import os
import pandas as pd, geopandas as gp, matplotlib.pyplot as plt, numpy as np
import requests 
import json
import contextily as ctx
from pandas import ExcelWriter
import censusdata

import sys
import getpass

user = getpass.getuser()
sys.dont_write_bytecode = True

# for DataViz team members
sys.path.insert(0, '/Users/{}/Box/DataViz Projects/Utility Code'.format(user))

from utils_io import *

## Pre-processing steps

In [2]:
def get_file_contents(filename):
    """ Given a filename,
        return the contents of that file
    """
    try:
        with open(filename, 'r') as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [3]:
api_file = 'census_api_key.txt'

## Query ACS API for 2014 and 2018 ACS

In [4]:
#Read Selected variables from table
acs_coc_selected_vars = pd.read_csv('Data/ACS_Table_Variables_SPFAM_MoE.csv')
acs_vars_lst = acs_coc_selected_vars['ACS_Table_Variable'].tolist()

In [5]:
def pull_acs5_tract_data(acs_year,variables_list,api_key):
    """
    Pull 5-Year American Community Survey Data (ACS) for the Bay Area at the tract level geography. Provide ACS
    data as a list. To get a full list of ACS variables, review the subject table variables for the ACS
    year you are pulling data for. 
    
    American Community Survey Documentation: https://www.census.gov/data/developers/data-sets/acs-5year.html
    
    Author: Joshua Croff
    """
    import requests
    variables = ','.join(variables_list)
    state = '06'
    counties = '001,013,041,055,075,081,085,095,097'
    url = '/'.join(['https://api.census.gov',
                   'data',
                   '{acs_year}'.format(acs_year=acs_year),
                   'acs',
                   'acs5?get={variables}&for=tract:*&in=county:{counties}&in=state:{state}&key={api_key}'
                    .format(variables=variables,
                            counties=counties,
                            state=state,
                            api_key=api_key)])
    rq = requests.get(url)
    data = rq.json()
    df = pd.DataFrame(data[1:],columns=data[0])
    
    #convert variable columns to numeric
    df[variables_list] = df[variables_list].apply(pd.to_numeric, errors='coerce')
    
    #Create geoid column
    df['geoid'] = df['state'] + df['county'] + df['tract']
    return df

In [35]:
#Set api key
api_key = get_file_contents(api_file)

In [90]:
#Pull ACS2018 Data
acs2018_df = pull_acs5_tract_data(acs_year='2018',variables_list=acs_vars_lst,api_key=api_key)

In [91]:
#Rename columns to human readable names
col_rename_acs18 = {'B11004_001E':'tot_fam_acs18',
             'B11004_001M':'tot_fam_moe_acs18',
             'B11004_010E':'male_hh_child_acs18',
             'B11004_010M':'male_hh_child_moe_acs18',
             'B11004_016E':'female_hh_child_acs18',
             'B11004_016M':'female_hh_child_moe_acs18'}
acs2018_df.rename(columns=col_rename_acs18,inplace=True)

In [92]:
#Drop unneeded columns
acs2018_df.drop(columns=['state','tract'],inplace=True)

In [93]:
#Add single parent family column 
acs2018_df['pop_spfam_acs18'] = acs2018_df['male_hh_child_acs18'] + acs2018_df['female_hh_child_acs18']

In [94]:
#Add percentage single parent family column
acs2018_df['pct_spfam_acs18'] = acs2018_df['pop_spfam_acs18'] / acs2018_df['tot_fam_acs18']

In [95]:
def flag_high_moe(row,pop,pop_moe,male_hh,male_hh_moe,female_hh,female_hh_moe):
    if (row[pop_moe] > row[pop]) | (row[male_hh_moe] > row[male_hh]) | (row[female_hh_moe] > row[female_hh]):
        return 1
    else:
        return 0

In [96]:
#Add MoE flag column
acs2018_df['moe_over_est_acs18'] = acs2018_df.apply(lambda row: flag_high_moe(row,
                                                                        'tot_fam_acs18',
                                                                        'tot_fam_moe_acs18',
                                                                        'male_hh_child_acs18',
                                                                        'male_hh_child_moe_acs18',
                                                                        'female_hh_child_acs18',
                                                                        'female_hh_child_moe_acs18'),
                                              axis=1 )

In [97]:
acs2018_df.head(5)

Unnamed: 0,tot_fam_acs18,tot_fam_moe_acs18,male_hh_child_acs18,male_hh_child_moe_acs18,female_hh_child_acs18,female_hh_child_moe_acs18,county,geoid,pop_spfam_acs18,pct_spfam_acs18,moe_over_est_acs18
0,565,113,49,42,119,83,55,6055200202,168,0.297345,0
1,981,114,16,19,37,25,55,6055200400,53,0.054027,1
2,11,14,0,12,4,6,55,6055200900,4,0.363636,1
3,1841,123,15,24,46,43,85,6085508101,61,0.033134,1
4,1095,111,19,21,74,47,85,6085509202,93,0.084932,1


In [98]:
#Pull ACS2013 Data
acs2013_df = pull_acs5_tract_data(acs_year='2013',variables_list=acs_vars_lst,api_key=api_key)

In [99]:
acs2013_df.head(5)

Unnamed: 0,B11004_001E,B11004_001M,B11004_010E,B11004_010M,B11004_016E,B11004_016M,state,county,tract,geoid
0,481,91,12,19,68,49,6,1,401200,6001401200
1,609,131,12,23,129,76,6,1,401300,6001401300
2,766,134,62,47,204,84,6,1,401400,6001401400
3,407,112,39,32,109,64,6,1,401500,6001401500
4,306,84,19,21,123,45,6,1,401600,6001401600


In [100]:
#Rename columns to human readable names
col_rename_acs13 = {'B11004_001E':'tot_fam_acs13',
             'B11004_001M':'tot_fam_moe_acs13',
             'B11004_010E':'male_hh_child_acs13',
             'B11004_010M':'male_hh_child_moe_acs13',
             'B11004_016E':'female_hh_child_acs13',
             'B11004_016M':'female_hh_child_moe_acs13'}
acs2013_df.rename(columns=col_rename_acs13,inplace=True)

In [101]:
acs2013_df.drop(columns=['state','tract'],inplace=True)

In [102]:
#Add single parent family column
acs2013_df['pop_spfam_acs13'] = acs2013_df['male_hh_child_acs13'] + acs2013_df['female_hh_child_acs13']

In [103]:
#Add percentage single parent family column
acs2013_df['pct_spfam_acs13'] = acs2013_df['pop_spfam_acs13'] / acs2013_df['tot_fam_acs13']

In [104]:
#Add MoE Column 
acs2013_df['moe_over_est_acs13'] = acs2013_df.apply(lambda row: flag_high_moe(row,
                                                                        'tot_fam_acs13',
                                                                        'tot_fam_moe_acs13',
                                                                        'male_hh_child_acs13',
                                                                        'male_hh_child_moe_acs13',
                                                                        'female_hh_child_acs13',
                                                                        'female_hh_child_moe_acs13'),
                                              axis=1 )

In [105]:
acs2013_df.head(10)

Unnamed: 0,tot_fam_acs13,tot_fam_moe_acs13,male_hh_child_acs13,male_hh_child_moe_acs13,female_hh_child_acs13,female_hh_child_moe_acs13,county,geoid,pop_spfam_acs13,pct_spfam_acs13,moe_over_est_acs13
0,481,91,12,19,68,49,1,6001401200,80,0.16632,1
1,609,131,12,23,129,76,1,6001401300,141,0.231527,1
2,766,134,62,47,204,84,1,6001401400,266,0.347258,0
3,407,112,39,32,109,64,1,6001401500,148,0.363636,0
4,306,84,19,21,123,45,1,6001401600,142,0.464052,1
5,420,101,0,12,45,36,1,6001401700,45,0.107143,1
6,402,74,69,53,170,62,1,6001401800,239,0.594527,0
7,505,80,51,44,117,62,1,6001402200,168,0.332673,0
8,716,110,46,50,273,121,1,6001402400,319,0.445531,1
9,491,82,81,49,206,75,1,6001402500,287,0.584521,0


## Summarize Margins of Error for ACS2013 and ACS2018

In [106]:
#Summarize Margin of Error Over Estimate By County ACS2018
acs2018_df[['county','moe_over_est_acs18']].groupby(['county']).aggregate(sum)

Unnamed: 0_level_0,moe_over_est_acs18
county,Unnamed: 1_level_1
1,223
13,132
41,35
55,27
75,171
81,108
85,221
95,44
97,48


In [107]:
#Summarize Margin of Error Over Estimate By County ACS2013
acs2013_df[['county','moe_over_est_acs13']].groupby(['county']).aggregate(sum)

Unnamed: 0_level_0,moe_over_est_acs13
county,Unnamed: 1_level_1
1,224
13,125
41,37
55,29
75,177
81,113
85,229
95,49
97,52


## Join ACS2013 to ACS2018 for Comparison

In [124]:
acs_comparison = pd.merge(acs2018_df,
                         acs2013_df,
                         on='geoid',
                         how='inner')

In [125]:
acs_comparison.head(5)

Unnamed: 0,tot_fam_acs18,tot_fam_moe_acs18,male_hh_child_acs18,male_hh_child_moe_acs18,female_hh_child_acs18,female_hh_child_moe_acs18,county_x,geoid,pop_spfam_acs18,pct_spfam_acs18,...,tot_fam_acs13,tot_fam_moe_acs13,male_hh_child_acs13,male_hh_child_moe_acs13,female_hh_child_acs13,female_hh_child_moe_acs13,county_y,pop_spfam_acs13,pct_spfam_acs13,moe_over_est_acs13
0,565,113,49,42,119,83,55,6055200202,168,0.297345,...,512,104,15,17,108,53,55,123,0.240234,1
1,981,114,16,19,37,25,55,6055200400,53,0.054027,...,940,117,35,28,73,37,55,108,0.114894,0
2,11,14,0,12,4,6,55,6055200900,4,0.363636,...,0,12,0,12,0,12,55,0,,1
3,1841,123,15,24,46,43,85,6085508101,61,0.033134,...,1800,156,8,16,36,37,85,44,0.024444,1
4,1095,111,19,21,74,47,85,6085509202,93,0.084932,...,1054,111,17,25,59,39,85,76,0.072106,1


In [126]:
acs_comparison.columns.to_list()

['tot_fam_acs18',
 'tot_fam_moe_acs18',
 'male_hh_child_acs18',
 'male_hh_child_moe_acs18',
 'female_hh_child_acs18',
 'female_hh_child_moe_acs18',
 'county_x',
 'geoid',
 'pop_spfam_acs18',
 'pct_spfam_acs18',
 'moe_over_est_acs18',
 'tot_fam_acs13',
 'tot_fam_moe_acs13',
 'male_hh_child_acs13',
 'male_hh_child_moe_acs13',
 'female_hh_child_acs13',
 'female_hh_child_moe_acs13',
 'county_y',
 'pop_spfam_acs13',
 'pct_spfam_acs13',
 'moe_over_est_acs13']

In [127]:
acs_comparison.rename(columns={'county_x':'county_fips'},inplace=True)

In [128]:
county_fips_map = {'001':'Alameda',
                  '013':'Contra Costa',
                  '041':'Marin',
                  '055':'Napa',
                  '075':'San Francisco',
                  '081':'San Mateo',
                  '095':'Solano',
                  '097':'Sonoma',
                  '085':'Santa Clara'}

In [129]:
acs_comparison['county_name'] = acs_comparison['county_fips'].map(county_fips_map)

In [130]:
cols_arranged = ['county_name',
                 'county_fips',
                 'geoid',
                 'tot_fam_acs18',
                 'tot_fam_moe_acs18',
                 'tot_fam_acs13',
                 'tot_fam_moe_acs13',
                 'pop_spfam_acs18',
                 'pop_spfam_acs13',
                 'male_hh_child_acs18',
                 'male_hh_child_moe_acs18',
                 'male_hh_child_acs13',
                 'male_hh_child_moe_acs13',
                 'female_hh_child_acs18',
                 'female_hh_child_moe_acs18',
                 'female_hh_child_acs13',
                 'female_hh_child_moe_acs13',
                 'pct_spfam_acs18',
                 'pct_spfam_acs13',
                 'moe_over_est_acs18',
                 'moe_over_est_acs13']

In [131]:
acs_comparison = acs_comparison[cols_arranged].copy()

In [132]:
acs_comparison.head(5)

Unnamed: 0,county_name,county_fips,geoid,tot_fam_acs18,tot_fam_moe_acs18,tot_fam_acs13,tot_fam_moe_acs13,pop_spfam_acs18,pop_spfam_acs13,male_hh_child_acs18,...,male_hh_child_acs13,male_hh_child_moe_acs13,female_hh_child_acs18,female_hh_child_moe_acs18,female_hh_child_acs13,female_hh_child_moe_acs13,pct_spfam_acs18,pct_spfam_acs13,moe_over_est_acs18,moe_over_est_acs13
0,Napa,55,6055200202,565,113,512,104,168,123,49,...,15,17,119,83,108,53,0.297345,0.240234,0,1
1,Napa,55,6055200400,981,114,940,117,53,108,16,...,35,28,37,25,73,37,0.054027,0.114894,1,0
2,Napa,55,6055200900,11,14,0,12,4,0,0,...,0,12,4,6,0,12,0.363636,,1,1
3,Santa Clara,85,6085508101,1841,123,1800,156,61,44,15,...,8,16,46,43,36,37,0.033134,0.024444,1,1
4,Santa Clara,85,6085509202,1095,111,1054,111,93,76,19,...,17,25,74,47,59,39,0.084932,0.072106,1,1


In [134]:
(acs_comparison[['county_name','moe_over_est_acs18','moe_over_est_acs13']]
 .groupby('county_name')
 .aggregate(sum)
 .reset_index())

Unnamed: 0,county_name,moe_over_est_acs18,moe_over_est_acs13
0,Alameda,223,224
1,Contra Costa,132,125
2,Marin,35,37
3,Napa,27,29
4,San Francisco,171,177
5,San Mateo,108,113
6,Santa Clara,221,229
7,Solano,44,49
8,Sonoma,48,52


In [135]:
!ls

COC Diff ACS 2014 and ACS 2018.ipynb
COCs_2018.Rmd
COCs_2018.nb.html
Communities of Concern 2020 (ACS 2014-2018).ipynb
[34mData[m[m
Investigate Discrepancy between SPFAM ACS2014 and ACS2018 Data .ipynb
README.md
README_COC_ACS2016.md
[34mREADME_Images[m[m
census_api_key.txt
field_alias_script.py


In [137]:
acs_comparison.to_csv('Data/compare_acs13_acs18_spfam_moe.csv',index=False)