In [1]:
import csv
import boto3

In [35]:
def subset_to_CA(input_file_name, output_file_name, directory):
    '''
    Filters .csv files that contain 'state' header to California, then uploads to AWS bucket
    
    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    
    Parameters
    ----------
    input_file_name: string
        Name of origional .csv file    
    output_file_name: string
        Final output as a .csv file
              
    directory: string
        Directory path for output to upload to in AWS
    '''
    
    ## set-up for AWS  
    s3_client = boto3.client('s3')  
    bucket_name = 'ca-climate-index'  
    
    
    with open(input_file_name, 'r', newline='') as input_file:
        # Create a CSV reader
        csv_reader = csv.DictReader(input_file)

        # Extract the header from the reader
        header = csv_reader.fieldnames

        # Create a list to store filtered rows
        filtered_rows = [row for row in csv_reader if row.get('state') == 'CA' or 
            row.get('state') == 'California' or row.get('STATE') == 'CA' or
            row.get('St Name') == 'California' or row.get('State') == 'California'
            or row.get('QNSD12_1') == 'California']
    
    # Open the output CSV file for writing
    with open(output_file_name, 'w', newline='') as output_file:
        # Create a CSV writer
        csv_writer = csv.DictWriter(output_file, fieldnames=header, delimiter = ',')

        # Write the header to the output file
        csv_writer.writeheader()

        # Write the filtered rows to the output file
        csv_writer.writerows(filtered_rows)

    # Save the file to AWS S3 using the client
    with open(output_file_name, 'rb') as data:
        s3_client.upload_fileobj(data, bucket_name, f"{directory}/{output_file_name}")
    print(f"Merged and sorted files written to {output_file_name}")

In [6]:
# Example usage:
'''
input_csv = 'emergency_management_grants.csv'
output_csv = 'fema_emergency_management_grants_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/emergency_management_performance_grants'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to fema_emergency_management_grants_subset.csv


In [3]:
# Example usage:
'''
input_csv = 'nfip_community_status.csv'
output_csv = 'fema_community_status_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/nfip_community_status'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to fema_community_status_subset.csv


In [4]:
# Example usage:
'''
input_csv = 'hazard_mitigation_funding.csv'
output_csv = 'hazard_mitigation_funding_subset.csv'
aws_directory = '2a_subset/governance/community_preparedness/fema/hazard_mitigation_funding'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to hazard_mitigation_funding_subset.csv


In [12]:
# Example usage:
'''
input_csv = 'Fire_Stations.csv'
output_csv = 'fire_station_subset.csv'
aws_directory = '2a_subset/governance/emergency_response/usgs/fire_stations'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to fire_station_subset.csv


In [21]:
# Example usage:
'''
input_csv = 'allhlcn22.csv'
output_csv = 'employment_data_hachman_subset.csv'
aws_directory = '2a_subset/society_economy/economic_health/bureau_labor_statistics/employment_hachman_index'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to employment_data_hachman_subset.csv


In [25]:
# Example usage:
'''
input_csv = 'foodaccess2019.csv'
output_csv = 'food_access_subset.csv'
aws_directory = '2a_subset/society_economy/vulnerable_populations/usda/food_access'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to food_access_subset.csv


In [33]:
# Example usage:
'''
input_csv = 'fema_national_household_general_data.csv'
output_csv = 'fema_prepareness_survey_subset.csv'
aws_directory = '2a_subset/governance/personal_preparedness/fema/fema_national_household_survey'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to fema_prepareness_survey_subset.csv


In [36]:
# Example usage:
'''
input_csv = 'FimaNfipClaims.csv'
output_csv = 'fema_claims_CA_subset.csv'
aws_directory = '2a_subset/climate_risk/flood/loss/fema/flood_claims_ca'
subset_to_CA(input_csv, output_csv, aws_directory)
'''

Merged and sorted files written to fema_claims_CA_subset.csv
