## This script uploads manually downloaded data to AWS bucket for the California Climate Risk Index

In [1]:
# Import libraries
import boto3
import pandas as pd
import os
import sys

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import append_metadata

In [2]:
# Set AWS credentials
s3 = boto3.resource('s3')
s3_cl = boto3.client('s3') # for lower-level processes
bucket_name = 'ca-climate-index'
raw_path = '1_pull_data/' # path to raw datafiles in AWS bucket

In [4]:
def aws_datasource_dirs(domain, datasource):
    """Creates a dir in the respective domain dir, if not already available"""
    bucket = s3.Bucket(bucket_name)

    # path to folder in aws
    datasource_dir = '{0}{1}/{2}/'.format(raw_path, domain, datasource)

    # check if folder already exists
    dirs = []
    for item in bucket.objects.filter(Prefix=raw_path+domain+'/'):
        d = str(item.key)
        dirs += [d]

    if datasource_dir not in dirs:
        print('Creating folder for {}'.format(datasource_dir))
        bucket.put_object(Key=datasource_dir)

    return datasource_dir

@append_metadata
def manual_to_aws(domain, datasource, loc, export=False, varname=''):
    """
    Uploads data that was manually downloaded to AWS bucket.
    
    Parameters
    ----------
    domain: string
        built_environment, governance, natural_systems, society_economy, climate_risk
    datasource: string
        Organization of datasource
    loc: string
        Local path to filename to upload
    export: bool
        If True, exports file to specified AWS bucket
    
    Script
    ------
    manual_pull_upload.ipynb
    '''
    """
    if export ==True:
        # first check that folder is not already available
        path_to_save = aws_datasource_dirs(domain, datasource)

        # extract the filename from path
        loc = loc.replace('\\', '/')
        fname = loc.split('/')[-1]

        # point to location of file(s) locally and upload to aws
        try:
            s3_cl.upload_file(
                loc,
                bucket_name,
                aws_datasource_dirs(domain, datasource)+fname
            )
            print('{0} saved to {1}'.format(fname, path_to_save))
        except Exception as e:
            print(e)

### Pulling data pipeline file to obtain all variable names for metadata generation

In [3]:
ref_file = sys.path[-1]+'/metadata/Full Data Pipeline Notes - 1_ Pull.csv'
df = pd.read_csv(ref_file)
# drop empty columns
df = df.loc[:, df.columns.notna()]

df = df.drop(columns=['Link','Unnamed: 15'])
ref_df = df.fillna('N/A')

In [4]:
pd.set_option('display.max_rows', None)
#ref_df

### Enter in metrics that were scraped and adjusted before uploading to AWS, as they have their own metadata

In [6]:
# Build list of file names excluding 'N/A'
variable_names = [name for name in df['Variable'].values if name != 'N/A']

# Define problematic files which we are still investigating
skip_vars = ['natural_epa_air_quality',
             'governance_edd_responder_firefighter',
             'governance_edd_responder_nurse',
             'governance_edd_responder_parametics',
             'governance_edd_responder_police',
             'climate_noaa_flood_fatalities',
             'climate_usda_heat_crop_loss',
             'climate_usda_heat_crop_cost']

# Exclude files from the list
included_vars = [name for name in variable_names if name not in skip_vars]

### Generating metadata for all metrics that were manually downloaded and uploaded to AWS
* loop through each variable name not excluded and generate metadata

In [7]:
for var in included_vars:
    # get the file name by itself (no subdirectories)
    varname = var
    print(varname)
    # this is just for metadata creation, so export is set to false and the first
    # three variables can be anything
    manual_to_aws('all', 'all', 'any', export=False, varname=varname)

built_cpuc_internet
built_hifld_radio_towers
built_hifld_cellular_towers
built_hifld_microwave_towers
built_hifld_paging_towers
built_hifld_tv_contour
built_hifld_mobile_towers
built_acs_housing_vacancy
built_acs_housing_age
built_acs_housing_quality
built_acs_mobile_homes
built_caltrans_airports
built_caltrans_road_bottlenecks
built_caltrans_highways
built_caltrans_rails
built_caltrans_bridges
built_cec_power_plants
built_cec_transmission_lines
built_pse_power_shutoff
built_swcrb_wastewater_facilities
climate_unl_drought_severity
climate_unl_drought_coverage
climate_unl_drought_duration
climate_caladapt_drought_probability
climate_usda_drought_crop_loss
climate_usda_drought_crop_cost
climate_koordinates_floodplain
climate_caladapt_flood_exposure_precipitation
climate_caladapt_flood_exposure_runoff
climate_fema_nfip_claims
climate_fema_nfip_claim_cost
climate_noaa_flood_crop_cost
climate_caladapt_heatday_warmnight
climate_caladapt_heat_loss_chillhours
climate_caladapt_heat_loss_season_