# Safegraph Census Data

This file downloads raw data and does minor cleanup for the safegraph census data available on the aws covid data lake:
* https://aws.amazon.com/blogs/big-data/a-public-data-lake-for-analysis-of-covid-19-data/
* See covid_virtual_env.sh for environment setup
* Note that there are thousands of census data fields available for each county (and more granular than that).  The 'fields' list in this file will need to be edited to select a different set of columns from the available census data.

## Import Libraries

In [1]:
import boto3, pandas as pd, pickle

ModuleNotFoundError: No module named 'boto3'

## Download Data

This section uses the boto3 library to download the raw data files

In [None]:
def getRawData(keys, f_name1, f_name2, f_name3, f_name4):
    client = boto3.client('s3')
    obj = client.get_object(Bucket='covid19-lake', Key='safegraph-open-census-data/csv/metadata/cbg_fips_codes/cbg_fips_codes.csv')
    data_fips = pd.read_csv(obj['Body'])

    obj = client.get_object(Bucket='covid19-lake', Key='safegraph-open-census-data/csv/metadata/cbg_geographic_data/cbg_geographic_data.csv')
    data_geo = pd.read_csv(obj['Body'])
    
    obj = client.get_object(Bucket='covid19-lake', Key='safegraph-open-census-data/csv/metadata/cbg_field_descriptions/cbg_field_descriptions.csv')
    data_fields = pd.read_csv(obj['Body'])
    
    data_list = []
    for key in keys:
        obj = client.get_object(Bucket='covid19-lake', Key=key)
        data = pd.read_csv(obj['Body'])
        data_list.append(data)
        print('Added: ', key)
    
    census_data = pd.concat(data_list, axis=1)
    
    with open(f_name1, 'wb') as f:
        pickle.dump(census_data,f)
        
    with open(f_name2, 'wb') as f:
        pickle.dump(data_fips,f)
        
    with open(f_name3, 'wb') as f:
        pickle.dump(data_geo,f)
        
    with open(f_name4, 'wb') as f:
        pickle.dump(data_fields,f)
        
    print("Raw census data saved to: " + f_name1)
    
    return census_data, data_fips, data_geo, data_fields

In [None]:
keys = [
    'safegraph-open-census-data/csv/data/cbg_b01.csv', #Age/Gender
    'safegraph-open-census-data/csv/data/cbg_b02.csv', #Race
    'safegraph-open-census-data/csv/data/cbg_b15.csv', #Education
    'safegraph-open-census-data/csv/data/cbg_b19.csv', #Income
    'safegraph-open-census-data/csv/data/cbg_b27.csv', #Insurance
    'safegraph-open-census-data/csv/data/cbg_c17.csv'  #Poverty
]
census_data, data_fips, data_geo, data_fields = getRawData(keys, 
                                                           'safegraph_census_raw.p',
                                                          'safegraph_census_fips.p',
                                                          'safegraph_census_geo.p',
                                                          'safegraph_census_fields.p')

In [None]:
census_data.head(3)

## Create List of Select Columns
There are a large amount of columns available, so this section creates a list of only thos columns of interest.
* List was created by inspecting the 'data_fields' table pulled from the raw data.

In [None]:
fields = [
    ['B01001e1','pop_total'], #SEX BY AGE: Total: Total population -- (Estimate)

    ['B01001e2','m_total'], #SEX BY AGE: Male: Total population -- (Estimate)
    ['B01001e3','m_0_5'], #SEX BY AGE: Male: Under 5 years: Total population -- (Estimate)
    ['B01001e4','m_5_9'], #SEX BY AGE: Male: 5 to 9 years: Total population -- (Estimate)
    ['B01001e5','m_10_14'], #SEX BY AGE: Male: 10 to 14 years: Total population -- (Estimate)
    ['B01001e6','m_15_17'], #SEX BY AGE: Male: 15 to 17 years: Total population -- (Estimate)
    ['B01001e7','m_18_19'], #SEX BY AGE: Male: 18 and 19 years: Total population -- (Estimate)
    ['B01001e8','m_20_20'], #SEX BY AGE: Male: 20 years: Total population -- (Estimate)
    ['B01001e9','m_21_21'], #SEX BY AGE: Male: 21 years: Total population -- (Estimate)
    ['B01001e10','m_22_24'], #SEX BY AGE: Male: 22 to 24 years: Total population -- (Estimate)
    ['B01001e11','m_25_29'], #SEX BY AGE: Male: 25 to 29 years: Total population -- (Estimate)
    ['B01001e12','m_30_34'], #SEX BY AGE: Male: 30 to 34 years: Total population -- (Estimate)
    ['B01001e13','m_35_39'], #SEX BY AGE: Male: 35 to 39 years: Total population -- (Estimate)
    ['B01001e14','m_40_44'], #SEX BY AGE: Male: 40 to 44 years: Total population -- (Estimate)
    ['B01001e15','m_45_49'], #SEX BY AGE: Male: 45 to 49 years: Total population -- (Estimate)
    ['B01001e16','m_50_54'], #SEX BY AGE: Male: 50 to 54 years: Total population -- (Estimate)
    ['B01001e17','m_55_59'], #SEX BY AGE: Male: 55 to 59 years: Total population -- (Estimate)
    ['B01001e18','m_60_61'], #SEX BY AGE: Male: 60 and 61 years: Total population -- (Estimate)
    ['B01001e19','m_62_64'], #SEX BY AGE: Male: 62 to 64 years: Total population -- (Estimate)
    ['B01001e20','m_65_66'], #SEX BY AGE: Male: 65 and 66 years: Total population -- (Estimate)
    ['B01001e21','m_67_69'], #SEX BY AGE: Male: 67 to 69 years: Total population -- (Estimate)
    ['B01001e22','m_70_74'], #SEX BY AGE: Male: 70 to 74 years: Total population -- (Estimate)
    ['B01001e23','m_75_79'], #SEX BY AGE: Male: 75 to 79 years: Total population -- (Estimate)
    ['B01001e24','m_80_84'], #SEX BY AGE: Male: 80 to 84 years: Total population -- (Estimate)
    ['B01001e25','m_85_110'], #SEX BY AGE: Male: 85 years and over: Total population -- (Estimate)

    ['B01001e26','f_total'], #SEX BY AGE: Female: Total population -- (Estimate)
    ['B01001e27','f_0_5'], #SEX BY AGE: Female: Under 5 years: Total population -- (Estimate)
    ['B01001e28','f_5_9'], #SEX BY AGE: Female: 5 to 9 years: Total population -- (Estimate)
    ['B01001e29','f_10_14'], #SEX BY AGE: Female: 10 to 14 years: Total population -- (Estimate)
    ['B01001e30','f_15_17'], #SEX BY AGE: Female: 15 to 17 years: Total population -- (Estimate)
    ['B01001e31','f_18_19'], #SEX BY AGE: Female: 18 and 19 years: Total population -- (Estimate)
    ['B01001e32','f_20_20'], #SEX BY AGE: Female: 20 years: Total population -- (Estimate)
    ['B01001e33','f_21_21'], #SEX BY AGE: Female: 21 years: Total population -- (Estimate)
    ['B01001e34','f_22_24'], #SEX BY AGE: Female: 22 to 24 years: Total population -- (Estimate)
    ['B01001e35','f_25_29'], #SEX BY AGE: Female: 25 to 29 years: Total population -- (Estimate)
    ['B01001e36','f_30_34'], #SEX BY AGE: Female: 30 to 34 years: Total population -- (Estimate)
    ['B01001e37','f_35_39'], #SEX BY AGE: Female: 35 to 39 years: Total population -- (Estimate)
    ['B01001e38','f_40_44'], #SEX BY AGE: Female: 40 to 44 years: Total population -- (Estimate)
    ['B01001e39','f_45_49'], #SEX BY AGE: Female: 45 to 49 years: Total population -- (Estimate)
    ['B01001e40','f_50_54'], #SEX BY AGE: Female: 50 to 54 years: Total population -- (Estimate)
    ['B01001e41','f_55_59'], #SEX BY AGE: Female: 55 to 59 years: Total population -- (Estimate)
    ['B01001e42','f_60_61'], #SEX BY AGE: Female: 60 and 61 years: Total population -- (Estimate)
    ['B01001e43','f_62_64'], #SEX BY AGE: Female: 62 to 64 years: Total population -- (Estimate)
    ['B01001e44','f_65_66'], #SEX BY AGE: Female: 65 and 66 years: Total population -- (Estimate)
    ['B01001e45','f_67_69'], #SEX BY AGE: Female: 67 to 69 years: Total population -- (Estimate)
    ['B01001e46','f_70_74'], #SEX BY AGE: Female: 70 to 74 years: Total population -- (Estimate)
    ['B01001e47','f_75_79'], #SEX BY AGE: Female: 75 to 79 years: Total population -- (Estimate)
    ['B01001e48','f_80_84'], #SEX BY AGE: Female: 80 to 84 years: Total population -- (Estimate)
    ['B01001e49','f_85_110'], #SEX BY AGE: Female: 85 years and over: Total population -- (Estimate)

    ['B02001e1','r_total'],  #RACE: Total: Total population -- (Estimate)
    ['B02001e2','r_white'], #RACE: White alone: Total population -- (Estimate)
    ['B02001e3','r_black'], #RACE: Black or African American alone: Total population -- (Estimate)
    ['B02001e4','r_native'], #RACE: American Indian and Alaska Native alone: Total population -- (Estimate)
    ['B02001e5','r_asian'], #RACE: Asian alone: Total population -- (Estimate)
    ['B02001e6','r_pacific'], #RACE: Native Hawaiian and Other Pacific Islander alone: Total population -- (Estimate)
    ['B02001e7','r_other'], #RACE: Some other race alone: Total population -- (Estimate)
    ['B02001e8','r_mix1'], #RACE: Two or more races: Total population -- (Estimate)
    ['B02001e9','r_mix2'], #RACE: Two or more races: Two races including Some other race: Total population -- (Estimate)
    ['B02001e10','r_mix3'], #"RACE: Two or more races: Two races excluding Some other race, and three or more races: Total population -- (Estimate)"
    ['B02008e1','r_mix4'], #WHITE ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: White alone or in combination with one or more other races -- (Estimate)
    ['B02009e1','r_mix5'], #BLACK OR AFRICAN AMERICAN ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: Black or African American alone or in combination with one or more other races -- (Estimate)
    ['B02010e1','r_mix6'], #AMERICAN INDIAN AND ALASKA NATIVE ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: People who are American Indian or Alaska Native alone or in combination with one or more other races -- (Estimate)
    ['B02011e1','r_mix7'], #ASIAN ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: Asian alone or in combination with one or more other races -- (Estimate)
    ['B02012e1','r_mix8'], #NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: Native Hawaiian and Other Pacific Islander alone or in combination with one or more other races -- (Estimate)
    ['B02013e1','r_mix9'], #SOME OTHER RACE ALONE OR IN COMBINATION WITH ONE OR MORE OTHER RACES: Total: Some other race alone or in combination with one or more other races -- (Estimate)

    ['B15003e1','e_total'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Total: Population 25 years and over -- (Estimate)
    ['B15003e2','e_none'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: No schooling completed: Population 25 years and over -- (Estimate)
    ['B15003e3','e_nursery'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Nursery school: Population 25 years and over -- (Estimate)
    ['B15003e4','e_k'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Kindergarten: Population 25 years and over -- (Estimate)
    ['B15003e5','e_1'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 1st grade: Population 25 years and over -- (Estimate)
    ['B15003e6','e_2'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 2nd grade: Population 25 years and over -- (Estimate)
    ['B15003e7','e_3'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 3rd grade: Population 25 years and over -- (Estimate)
    ['B15003e8','e_4'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 4th grade: Population 25 years and over -- (Estimate)
    ['B15003e9','e_5'],  #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 5th grade: Population 25 years and over -- (Estimate)
    ['B15003e10','e_6'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 6th grade: Population 25 years and over -- (Estimate)
    ['B15003e11','e_7'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 7th grade: Population 25 years and over -- (Estimate)
    ['B15003e12','e_8'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 8th grade: Population 25 years and over -- (Estimate)
    ['B15003e13','e_9'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 9th grade: Population 25 years and over -- (Estimate)
    ['B15003e14','e_10'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 10th grade: Population 25 years and over -- (Estimate)
    ['B15003e15','e_11'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 11th grade: Population 25 years and over -- (Estimate)
    ['B15003e16','e_12_no_diploma'], #"EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: 12th grade, no diploma: Population 25 years and over -- (Estimate)"
    ['B15003e17','e_hs'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Regular high school diploma: Population 25 years and over -- (Estimate)
    ['B15003e18','e_ged'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: GED or alternative credential: Population 25 years and over -- (Estimate)
    ['B15003e19','e_col1'], #"EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Some college, less than 1 year: Population 25 years and over -- (Estimate)"
    ['B15003e20','e_col1_no_deg'], #"EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Some college, 1 or more years, no degree: Population 25 years and over -- (Estimate)"
    ['B15003e21','e_asso'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Associate's degree: Population 25 years and over -- (Estimate)
    ['B15003e22','e_bach'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Bachelor's degree: Population 25 years and over -- (Estimate)
    ['B15003e23','e_mast'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Master's degree: Population 25 years and over -- (Estimate)
    ['B15003e24','e_prof'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Professional school degree: Population 25 years and over -- (Estimate)
    ['B15003e25','e_doct'], #EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER: Doctorate degree: Population 25 years and over -- (Estimate)

    ['B19001e1','hi_total'],  #HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): Total: Households -- (Estimate)
    ['B19001e2','hi_0_9'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): Less than $10,000: Households -- (Estimate)"
    ['B19001e3','hi_10_14'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $10,000 to $14,999: Households -- (Estimate)"
    ['B19001e4','hi_15_19'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $15,000 to $19,999: Households -- (Estimate)"
    ['B19001e5','hi_20_24'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $20,000 to $24,999: Households -- (Estimate)"
    ['B19001e6','hi_25_29'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $25,000 to $29,999: Households -- (Estimate)"
    ['B19001e7','hi_30_34'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $30,000 to $34,999: Households -- (Estimate)"
    ['B19001e8','hi_35_39'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $35,000 to $39,999: Households -- (Estimate)"
    ['B19001e9','hi_40_45'],  #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $40,000 to $44,999: Households -- (Estimate)"
    ['B19001e10','hi_45_49'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $45,000 to $49,999: Households -- (Estimate)"
    ['B19001e11','hi_50_59'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $50,000 to $59,999: Households -- (Estimate)"
    ['B19001e12','hi_60_74'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $60,000 to $74,999: Households -- (Estimate)"
    ['B19001e13','hi_75_99'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $75,000 to $99,999: Households -- (Estimate)"
    ['B19001e14','hi_100_124'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $100,000 to $124,999: Households -- (Estimate)"
    ['B19001e15','hi_125_149'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $125,000 to $149,999: Households -- (Estimate)"
    ['B19001e16','hi_150_199'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $150,000 to $199,999: Households -- (Estimate)"
    ['B19001e17','hi_200_plus'], #"HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $200,000 or more: Households -- (Estimate)"

    ['B19101e1','fi_total'], #FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): Total: Families -- (Estimate)
    ['B19101e2','fi_0_9'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): Less than $10,000: Families -- (Estimate)"
    ['B19101e3','fi_10_14'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $10,000 to $14,999: Families -- (Estimate)"
    ['B19101e4','fi_15_19'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $15,000 to $19,999: Families -- (Estimate)"
    ['B19101e5','fi_20_24'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $20,000 to $24,999: Families -- (Estimate)"
    ['B19101e6','fi_25_29'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $25,000 to $29,999: Families -- (Estimate)"
    ['B19101e7','fi_30_34'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $30,000 to $34,999: Families -- (Estimate)"
    ['B19101e8','fi_35_39'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $35,000 to $39,999: Families -- (Estimate)"
    ['B19101e9','fi_40_44'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $40,000 to $44,999: Families -- (Estimate)"
    ['B19101e10','fi_45_49'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $45,000 to $49,999: Families -- (Estimate)"
    ['B19101e11','fi_50_59'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $50,000 to $59,999: Families -- (Estimate)"
    ['B19101e12','fi_60_74'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $60,000 to $74,999: Families -- (Estimate)"
    ['B19101e13','fi_75_99'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $75,000 to $99,999: Families -- (Estimate)"
    ['B19101e14','fi_100_124'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $100,000 to $124,999: Families -- (Estimate)"
    ['B19101e15','fi_125_149'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $125,000 to $149,999: Families -- (Estimate)"
    ['B19101e16','fi_150_199'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $150,000 to $199,999: Families -- (Estimate)"
    ['B19101e17','fi_200_plus'], #"FAMILY INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): $200,000 or more: Families -- (Estimate)"

    ['B19301e1','p_c_i'], #PER CAPITA INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS): Total: Total population -- (Estimate)

    ['C17002e1','pir_total'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: Total: Population for whom poverty status is determined -- (Estimate)
    ['C17002e2','pir_000_049'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: Under .50: Population for whom poverty status is determined -- (Estimate)
    ['C17002e3','pir_050_099'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: .50 to .99: Population for whom poverty status is determined -- (Estimate)
    ['C17002e4','pir_100_124'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: 1.00 to 1.24: Population for whom poverty status is determined -- (Estimate)
    ['C17002e5','pir_125_149'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: 1.25 to 1.49: Population for whom poverty status is determined -- (Estimate)
    ['C17002e6','pir_150_184'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: 1.50 to 1.84: Population for whom poverty status is determined -- (Estimate)
    ['C17002e7','pir_185_199'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: 1.85 to 1.99: Population for whom poverty status is determined -- (Estimate)
    ['C17002e8','pir_200_plus'], #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: 2.00 and over: Population for whom poverty status is determined -- (Estimate)

    ['B27010e1','unins_total'], #TYPES OF HEALTH INSURANCE COVERAGE BY AGE: Total: Civilian noninstitutionalized population -- (Estimate)
    ['B27010e17','unins_0_18'], #TYPES OF HEALTH INSURANCE COVERAGE BY AGE: Under 18 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate)
    ['B27010e33','unins_18_34'], #TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 18 to 34 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate)
    ['B27010e50','unins_35_64'], #TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 to 64 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate)
    ['B27010e66','unins_65_plus'], #TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 65 years and over: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate)

]

# Create the Processed Census Data

In [None]:
def processRawData(df, fields, f_name):
    
    # Get only the raw field name from fields (define above)
    field_ids = []
    for field in fields:
        field_ids.append(field[0])
    field_ids.insert(0, "census_block_group")
    
    # Select those columns from the raw census data
    df = df[field_ids]
    
    # Remove the duplicate columns (census block group)
    df = df.loc[:,~df.columns.duplicated()]
    
    # Add leading zeroes to census_block_group
    df['census_block_group'] = df['census_block_group'].astype('str').apply(lambda x: x.zfill(12))
    
    # Add FIPS code
    df['fips_code'] = df['census_block_group'].astype('str').str[:5]
    
    # Make PCI the sumproduct so we can calculate the people-weighted average after the group by
    df['B19301e1'] = df['B19301e1'] * df['B01001e1']
    
    # Group by the new fips code, and sum the values
    del field_ids[0]
    df = df.groupby(['fips_code'])[field_ids].sum().reset_index()    
    
    # Rename all the columns to the human readable versions
    for field in fields:
        df = df.rename(columns={field[0]:field[1]})
        
    # Recalculate the average PCI based on the population
    df['p_c_i'] = df['p_c_i'] / df['pop_total']
    
    with open(f_name, 'wb') as f:
        pickle.dump(df, f)
        
    return df

In [None]:
census_data = processRawData(census_data, fields, 'safegraph_census.p')

In [None]:
census_data.head(3)

# FIPS Metadata

In [None]:
def processFips(df, f_name):
    df.state_fips = df.state_fips.astype(str).apply(lambda x: x.zfill(2))
    df.county_fips = df.county_fips.astype(str).apply(lambda x: x.zfill(3))
    df['fips'] = df['state_fips'] + df['county_fips']
    df['state_county'] = df['state'] + df['county']
    
    with open(f_name, 'wb') as f:
        pickle.dump(df, f)
        
    return df

In [None]:
data_fips = processFips(data_fips, 'safegraph_ref_data_fips.p')

In [None]:
data_fips.head(3)

# Geographic Metadata

In [None]:
def processGeo(df, f_name):
    df.census_block_group = df.census_block_group.astype(str).apply(lambda x: x.zfill(12))
    df['fips'] = df['census_block_group'].str[:5]
    df = df.groupby(['fips'])[['amount_land', 'amount_water']].sum()
    df.amount_land = df.amount_land.astype('int64')
    df.reset_index(level=0, inplace=True)
    df = df.rename(columns={"fips":"fips_code"})
    
    with open(f_name, 'wb') as f:
        pickle.dump(df, f)
    
    return df

In [None]:
data_land = processGeo(data_geo, 'safegraph_land.p')

In [None]:
data_land.head(3)