In [1]:
import csv
import boto3
import os
import sys

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [3]:
@append_metadata
def subset_to_CA(input_file_name, output_file_name, directory, export=False, varname=''):
    '''
    Subsets .csv files that contain 'state' header to California data, then uploads to AWS subset bucket
    
    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    
    Parameters
    ----------
    input_file_name: string
        Name of origional .csv file    
    output_file_name: string
        Final output as a .csv file   
    directory: string
        Directory path for output to upload to in AWS
    export: bool
        If True, runs code and uploads output file to S3
        If False, just generates metadata

    Script
    ------
    csv_california_subset.ipynb
    '''
    
    ## set-up for AWS  
    s3_client = boto3.client('s3')  
    bucket_name = 'ca-climate-index'  
    
    # run code if export is true, if false, nothing happens (useful when appending metadata)
    if export == True: 
        with open(input_file_name, 'r', newline='') as input_file:
            # Create a CSV reader
            csv_reader = csv.DictReader(input_file)

            # Extract the header from the reader
            header = csv_reader.fieldnames

            # Create a list to store filtered rows
            filtered_rows = [row for row in csv_reader if row.get('state') == 'CA' or 
                row.get('state') == 'California' or row.get('STATE') == 'CA' or
                row.get('St Name') == 'California' or row.get('State') == 'California'
                or row.get('QNSD12_1') == 'California']
        
        # Open the output CSV file for writing
        with open(output_file_name, 'w', newline='') as output_file:
            # Create a CSV writer
            csv_writer = csv.DictWriter(output_file, fieldnames=header, delimiter = ',')

            # Write the header to the output file
            csv_writer.writeheader()

            # Write the filtered rows to the output file
            csv_writer.writerows(filtered_rows)

        # Save the file to AWS S3 using the client
            with open(output_file_name, 'rb') as data:
                s3_client.upload_fileobj(data, bucket_name, f"{directory}/{output_file_name}")
            print(f"Merged and sorted files written to {output_file_name}")
    if export == False:
        print(f'{output_file_name} uploaded to AWS.')


# Metadata call for all metrics that use this function to subset to California
* setting export to False omits all of the function code, so the first three variables can be anything
* this allows us to focus on just uploading the functions metadata to the necessary metrics

In [4]:
list_vars = [
            'governance_fema_household_water_resilience',
            'governance_fema_household_power_resilience',
            'governance_fema_household_preparedness',
            'climate_fema_nfip_claims',
            'climate_fema_nfip_claim_cost',
            'society_usda_food_accessibility',
            'society_bls_hachman',
            'governance_usgs_fire_stations',
            'governance_fema_hazard_mitigation',
            'governance_fema_communitiy_flood',
            'governance_fema_emergency_grants'
]
for vars in list_vars:
    subset_to_CA('na', 'na', 'na', export=False, varname=vars)

Running various metrics below:

In [6]:
input_csv = 'emergency_management_grants.csv'
output_csv = 'fema_emergency_management_grants_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/emergency_management_performance_grants'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='test')

In [8]:
input_csv = 'nfip_community_status.csv'
output_csv = 'fema_community_status_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/nfip_community_status'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [9]:
input_csv = 'hazard_mitigation_funding.csv'
output_csv = 'hazard_mitigation_funding_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/hazard_mitigation_funding'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [10]:
input_csv = 'Fire_Stations.csv'
output_csv = 'fire_station_subset.csv'
aws_directory = '2a_subset/governance/emergency_response/usgs/fire_stations'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [12]:
input_csv = 'allhlcn22.csv'
output_csv = 'employment_data_hachman_subset.csv'
aws_directory = '2a_subset/society_economy/economic_health/bureau_labor_statistics/employment_hachman_index'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [14]:
input_csv = 'foodaccess2019.csv'
output_csv = 'food_access_subset.csv'
aws_directory = '2a_subset/society_economy/vulnerable_populations/usda/food_access'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [13]:
input_csv = 'fema_national_household_general_data.csv'
output_csv = 'fema_prepareness_survey_subset.csv'
aws_directory = '2a_subset/governance/personal_preparedness/fema/fema_national_household_survey'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname='')

In [36]:
input_csv = 'FimaNfipClaims.csv'
output_csv = 'fema_claims_CA_subset.csv'
aws_directory = '2a_subset/climate_risk/flood/loss/fema/flood_claims_ca'
subset_to_CA(input_csv, output_csv, aws_directory, export=False, varname=''

Merged and sorted files written to fema_claims_CA_subset.csv
