In [1]:
import pandas as pd
import os
import sys
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [2]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/society_economy/vulnerable_populations/usda/food_access/'
output_folder = 'vulnerable_food_data'

pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False)

Saved DataFrame as 'vulnerable_food_data\food_access_subset.csv'


In [3]:
# read in food access data (already subsetted for CA)
food_access_data = pd.read_csv(r'vulnerable_food_data/food_access_subset.csv')
print(len(food_access_data))
#os.remove('food_access_subset.csv')

8024


In [4]:
food_access_data = food_access_data[['CensusTract', 'County', 'Pop2010', 'LA1and10', 'LAPOP1_10']]
food_access_data['percent_1miurban_10mirural'] = (food_access_data['LAPOP1_10'] / food_access_data['Pop2010']) * 100

#### Use the 2010 population from this dataset as it is used for the estimated low access population values

In [5]:
food_access_data.rename(columns={'Pop2010': '2010_population', 'LA1and10': 'flag_over_1mileurban_10milerural_store', 'LAPOP1_10': 'population_over_1mileurban_10milerural_store'}, inplace=True)
food_access_data = food_access_data.rename(columns={'CensusTract':'census_tract'})
food_access_data.tail(10)

Unnamed: 0,census_tract,County,2010_population,flag_over_1mileurban_10milerural_store,population_over_1mileurban_10milerural_store,percent_1miurban_10mirural
8014,6115040303,Yuba County,6555,1,2382.0,36.338673
8015,6115040400,Yuba County,5538,1,1209.0,21.830986
8016,6115040500,Yuba County,4111,1,3450.0,83.921187
8017,6115040600,Yuba County,6130,1,6130.0,100.0
8018,6115040700,Yuba County,10879,0,0.0,0.0
8019,6115040800,Yuba County,4233,0,,
8020,6115040901,Yuba County,2783,0,0.0,0.0
8021,6115040902,Yuba County,1737,0,0.0,0.0
8022,6115041000,Yuba County,7357,0,,
8023,6115041100,Yuba County,4941,0,281.0,5.687108


## Calculate average percent population >1mi urban and >10mi rural from a store per county
* used to fact check after merging this dataset with 2021 ACS tracts and filling in extra tracts with average values per county

In [6]:
# Prompt the user to input the county name
county_name = input("Enter the name of the county: ")

columns_to_fill = [
                    'percent_1miurban_10mirural'
]

# Filter the dataframe for the specified county
county_data = food_access_data[food_access_data['County'] == county_name]

# Print out the average values for the specified county
print(f"Average values for {county_name}:")
for column in columns_to_fill:
    avg_value = county_data[column].mean()
    print(f"{column}: {avg_value}")

Average values for Yuba:
percent_1miurban_10mirural: nan


## Import and merge 2021 ACS tract data as this dataset has around 8,000 tracts

In [7]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'TRACT': 'census_tract'})
ca_county_tract = ca_county_tract.rename(columns={'County': 'county'})

ca_county_tract = ca_county_tract.drop(columns='Unnamed: 0')

ca_county_tract

Unnamed: 0,census_tract,COUNTYFP,county
0,6085504321,85,Santa Clara
1,6085504410,85,Santa Clara
2,6085507003,85,Santa Clara
3,6085507004,85,Santa Clara
4,6085502204,85,Santa Clara
...,...,...,...
9124,6059001303,59,Orange
9125,6059001304,59,Orange
9126,6059001401,59,Orange
9127,6013367200,13,Contra Costa


In [8]:
merged_food_access = pd.merge(ca_county_tract, food_access_data, on='census_tract', how='left')
print(len(merged_food_access))
merged_food_access.tail()

9129


Unnamed: 0,census_tract,COUNTYFP,county,County,2010_population,flag_over_1mileurban_10milerural_store,population_over_1mileurban_10milerural_store,percent_1miurban_10mirural
9124,6059001303,59,Orange,Orange County,5752.0,0.0,,
9125,6059001304,59,Orange,Orange County,3803.0,0.0,,
9126,6059001401,59,Orange,Orange County,5013.0,0.0,,
9127,6013367200,13,Contra Costa,Contra Costa County,5171.0,0.0,,
9128,6037578100,37,Los Angeles,,,,,


In [9]:
# Columns to fill NaN values
columns_to_fill = ['percent_1miurban_10mirural'
                    ]

# Add a new column indicating whether a value was originally NaN
original_na_flag_column = 'Original_NA_Flag'
merged_food_access[original_na_flag_column] = np.where(merged_food_access[columns_to_fill].isna().any(axis=1), 1, 0)

# Compute average values for each column grouped by 'County'
average_values_by_county = merged_food_access.groupby('county')[columns_to_fill].transform('mean')

# Fill NaN values in each column with the corresponding average value of that column for the respective 'County'
for column in columns_to_fill:
    na_mask = merged_food_access[column].isna()
    merged_food_access.loc[na_mask, column] = average_values_by_county.loc[na_mask, column]

print(len(merged_food_access))
merged_food_access.tail()

9129


Unnamed: 0,census_tract,COUNTYFP,county,County,2010_population,flag_over_1mileurban_10milerural_store,population_over_1mileurban_10milerural_store,percent_1miurban_10mirural,Original_NA_Flag
9124,6059001303,59,Orange,Orange County,5752.0,0.0,,25.604689,1
9125,6059001304,59,Orange,Orange County,3803.0,0.0,,25.604689,1
9126,6059001401,59,Orange,Orange County,5013.0,0.0,,25.604689,1
9127,6013367200,13,Contra Costa,Contra Costa County,5171.0,0.0,,36.159475,1
9128,6037578100,37,Los Angeles,,,,,29.74732,1


In [10]:
merged_food_access_metric = merged_food_access[['census_tract', 'county', 'percent_1miurban_10mirural']]
merged_food_access_metric

Unnamed: 0,census_tract,county,percent_1miurban_10mirural
0,6085504321,Santa Clara,27.547393
1,6085504410,Santa Clara,27.547393
2,6085507003,Santa Clara,27.547393
3,6085507004,Santa Clara,27.547393
4,6085502204,Santa Clara,27.547393
...,...,...,...
9124,6059001303,Orange,25.604689
9125,6059001304,Orange,25.604689
9126,6059001401,Orange,25.604689
9127,6013367200,Contra Costa,36.159475


In [11]:
merged_food_access_metric.to_csv('society_food_access_metric.csv')

In [12]:
@append_metadata
def calc_food_access(input_csv, export=False, varname = ''):
    '''
    Calculates the percentage of people living >1 mile from a grocery store in urban areas and 
    >10 miles in rural areas. Data is sourced from USDA's food access atlas: 
    https://www.ers.usda.gov/data-products/food-access-research-atlas/
    
    Methods
    -------
    Relevant columns within the original data were renamed and isolated to California for our 
    purposes. Percentage of population with low access to grocery stores was calculated by dividing 
    estimated population with low access by the total population (2010 data), as the data was based 
    on specific population estimates. Note this differs from other Cal-CRAI population-based estimates 
    using the 2020 population values. 
    
    Parameters
    ----------
    df: string
        the dataframe containing the food access data
    export: True/False boolean
        False = will not upload resulting df containing the food access metric to AWS
        True = will upload resulting df containing the food access metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    society_vulnerable_food_access.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    if export == False:
        print('Data transformation: select relevant columns to calculate metric.')
        print('Data transformation: rename columns for increased transparency/readability.')
        print('Data transformation: import 2021 American Community Survey tract data.')
        print('Data transformation: New, empty census tracts were in-filled with the average metric value for the county that tract resides within.')
        return None
    
    # export to csv and upload to AWS
    if export == True:
        # pull csv from aws
        bucket_name = 'ca-climate-index'
        upload_csv_aws([input_csv], bucket_name, '3_fair_data/index_data')

        os.remove('food_access_subset.csv') # remove from local to clear up directory
        
    return merged_food_access_metric # returns df

In [14]:
food_access_data = pd.read_csv('society_food_access_metric.csv')

calc_food_access(food_access_data, export=False,
                    varname = 'society_usda_food_accessibility')