In [1]:
import pandas as pd
import os
import sys

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [6]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/society_economy/vulnerable_populations/usda/food_access/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'food_access_subset.csv'


In [7]:
# read in food access data (already subsetted for CA)
food_access_data = pd.read_csv('food_access_subset.csv')
print(len(food_access_data))
os.remove('food_access_subset.csv')

8024


In [8]:
food_access_data.head(5)

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractSeniors,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP
0,6001400100,California,Alameda County,1,2937,1314,0,2.0,0.07,0,...,634,2161,142,456,9,4,165,117,28,0
1,6001400200,California,Alameda County,1,1974,856,0,70.0,3.55,0,...,323,1659,33,147,0,4,131,151,59,0
2,6001400300,California,Alameda County,1,4865,2418,0,40.0,0.82,0,...,493,3468,518,423,5,17,434,399,423,38
3,6001400400,California,Alameda County,1,3703,1779,0,33.0,0.89,0,...,353,2628,456,278,6,14,321,332,157,115
4,6001400500,California,Alameda County,1,3517,1578,0,126.0,3.58,0,...,314,1905,955,209,5,27,416,340,188,51


In [11]:
food_access_data = food_access_data[['CensusTract', 'Pop2010', 'LA1and10', 'LAPOP1_10']]

In [19]:
food_access_data['percent_1miurban_10mirural'] = (food_access_data['LAPOP1_10'] / food_access_data['Pop2010']) * 100

## I believe we will need to use the 2010 population from this dataset as it is used for the estimated low access population values

In [25]:
food_access_data.rename(columns={'Pop2010': '2010_population', 'LA1and10': 'flag_over_1mileurban_10milerural_store', 'LAPOP1_10': 'population_over_1mileurban_10milerural_store'}, inplace=True)

food_access_data['percent_1miurban_10mirural'].fillna(0, inplace=True)

In [26]:
food_access_data

Unnamed: 0,CensusTract,2010_population,flag_over_1mileurban_10milerural,population_over_1mileurban_10milerural,percent_1miurban_10mirural
0,6001400100,2937,1,1711.0,58.256725
1,6001400200,1974,0,,0.000000
2,6001400300,4865,0,,0.000000
3,6001400400,3703,0,,0.000000
4,6001400500,3517,0,,0.000000
...,...,...,...,...,...
8019,6115040800,4233,0,,0.000000
8020,6115040901,2783,0,0.0,0.000000
8021,6115040902,1737,0,0.0,0.000000
8022,6115041000,7357,0,,0.000000


In [39]:
@append_metadata
def calc_unhealthy_days(df, export=False, export_filename=None, varname = ''):
    '''
    Calculates the percentage of people living >1 mile from a grocery store in urban areas and >10 miles in rural areas. Data is sourced from USDA's food access atlas: https://www.ers.usda.gov/data-products/food-access-research-atlas/
    
    Methods
    -------
    Relevant columns within the original data were renamed and isolated to California for our purposes. Percentage of population with low access to grocery stores was calculated by dividing estimated population with low access by the total population (2010 data)

    Script
    ------
    society_vulnerable_food_access.ipynb
    
    Parameters
    ----------
    df: string
        the dataframe containing the food access data
    export: True/False boolian
        False = will not upload resulting df containing the food access metric to AWS
        True = will upload resulting df containing the food access metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    
    print('Data transformation: select relevant columns to calculate metric')
    food_access = df[['CensusTract', 'Pop2010', 'LA1and10', 'LAPOP1_10']].copy()
    
    # calculating percent metric
    food_access['percent_1miurban_10mirural_store'] = (food_access['LAPOP1_10'] / food_access['Pop2010']) * 100

    print('Data transformation: rename columns for increased transparency')
    food_access.rename(columns={'Pop2010': '2010_population', 
                                     'LA1and10': 'flag_over_1mileurban_10milerural_store', 
                                     'LAPOP1_10': 'population_over_1mileurban_10milerural_store'}, inplace=True)

    food_access['percent_1miurban_10mirural_store'].fillna(0, inplace=True)

    # export to csv and upload to AWS
    if export == True:
        food_access.to_csv(export_filename)
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [export_filename]
        upload_csv_aws(export_filename, bucket_name, directory)

        os.remove('food_access_subset.csv') # remove from local to clear up directory
        os.remove(export_filename[0])

    return food_access # returns df

In [40]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/society_economy/vulnerable_populations/usda/food_access/'
pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

food_access_data = pd.read_csv('food_access_subset.csv')

calc_unhealthy_days(food_access_data, export=True, export_filename = 'society_food_access_metric.csv',
                    varname = 'society_usda_food_accessibility')

Saved DataFrame as 'food_access_subset.csv'


Unnamed: 0,CensusTract,2010_population,flag_over_1mileurban_10milerural_store,population_over_1mileurban_10milerural_store,percent_1miurban_10mirural_store
0,6001400100,2937,1,1711.0,58.256725
1,6001400200,1974,0,,0.000000
2,6001400300,4865,0,,0.000000
3,6001400400,3703,0,,0.000000
4,6001400500,3517,0,,0.000000
...,...,...,...,...,...
8019,6115040800,4233,0,,0.000000
8020,6115040901,2783,0,0.0,0.000000
8021,6115040902,1737,0,0.0,0.000000
8022,6115041000,7357,0,,0.000000
