In [8]:
import os
import sys
import pandas as pd
import io
import numpy as np
import geopandas as gpd

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)
from scripts.utils.file_helpers import (
    pull_csv_from_directory, upload_csv_aws
) 

In [9]:
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/sea_level_rise/loss/climate_central/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'RCP_wetland_data.csv'


In [10]:
wetland_data = pd.read_csv('RCP_wetland_data.csv')


In [24]:
wetland_data = pd.read_csv('RCP_wetland_data.csv')

# Adjust the data
adjusted_wetland_data = wetland_data[22:]

# Set the first row as the header and reset index
adjusted_wetland_data.columns = adjusted_wetland_data.iloc[0]
adjusted_wetland_data = adjusted_wetland_data[1:].reset_index(drop=True)

# Drop the index column if it has been set as a column
adjusted_wetland_data.reset_index(drop=True, inplace=True)

# Rename columns to ensure no extra index is included
adjusted_wetland_data.columns.name = None

# Filter columns explicitly
columns_to_keep = [col for col in adjusted_wetland_data.columns 
                    if 'County' in col or '2020' in col or '2100' in col]
adjusted_wetland_data = adjusted_wetland_data[columns_to_keep]

adjusted_wetland_data.columns

Index(['County', 'RCP_2.6__5th_percentile__2020',
       'RCP_2.6__50th_percentile__2020', 'RCP_2.6__95th_percentile__2020',
       'RCP_4.5__5th_percentile__2020', 'RCP_4.5__50th_percentile__2020',
       'RCP_4.5__95th_percentile__2020', 'RCP_8.5__5th_percentile__2020',
       'RCP_8.5__50th_percentile__2020', 'RCP_8.5__95th_percentile__2020',
       'RCP_2.6__5th_percentile__2100', 'RCP_2.6__50th_percentile__2100',
       'RCP_2.6__95th_percentile__2100', 'RCP_4.5__5th_percentile__2100',
       'RCP_4.5__50th_percentile__2100', 'RCP_4.5__95th_percentile__2100',
       'RCP_8.5__5th_percentile__2100', 'RCP_8.5__50th_percentile__2100',
       'RCP_8.5__95th_percentile__2100'],
      dtype='object')

In [25]:
adjusted_wetland_data.head()

Unnamed: 0,County,RCP_2.6__5th_percentile__2020,RCP_2.6__50th_percentile__2020,RCP_2.6__95th_percentile__2020,RCP_4.5__5th_percentile__2020,RCP_4.5__50th_percentile__2020,RCP_4.5__95th_percentile__2020,RCP_8.5__5th_percentile__2020,RCP_8.5__50th_percentile__2020,RCP_8.5__95th_percentile__2020,RCP_2.6__5th_percentile__2100,RCP_2.6__50th_percentile__2100,RCP_2.6__95th_percentile__2100,RCP_4.5__5th_percentile__2100,RCP_4.5__50th_percentile__2100,RCP_4.5__95th_percentile__2100,RCP_8.5__5th_percentile__2100,RCP_8.5__50th_percentile__2100,RCP_8.5__95th_percentile__2100
0,Alameda,40.1,40.1,39.8,40.1,40.0,39.8,40.1,40.0,39.8,40.1,39.9,33.4,40.1,38.6,31.3,39.9,35.8,24.5
1,Alpine,,,,,,,,,,,,,,,,,,
2,Amador,,,,,,,,,,,,,,,,,,
3,Butte,,,,,,,,,,,,,,,,,,
4,Calaveras,,,,,,,,,,,,,,,,,,


In [40]:
# Function to calculate percent change between 2020 and 2100 columns
def calculate_percent_change(data, leave_alone=[]):
    # Convert columns to numeric, forcing non-numeric to NaN (skip columns in leave_alone)
    numeric_data = data.copy()
    for col in data.columns:
        if col not in leave_alone:
            numeric_data[col] = pd.to_numeric(data[col], errors='coerce')
    
    # Define columns for 2020 and 2100
    cols_2020 = [col for col in numeric_data.columns if '2020' in col]
    cols_2100 = [col for col in numeric_data.columns if '2100' in col]
    
    # Calculate percent change
    percent_change = pd.DataFrame()

    for col_2020 in cols_2020:
        # Find the matching 2100 column
        col_2100 = col_2020.replace('2020', '2100')

        if col_2100 in cols_2100:
            # Calculate percent change, handling NaN values
            percent_change[col_2020 + '_to_' + col_2100] = (
                (numeric_data[col_2100] - numeric_data[col_2020]) / numeric_data[col_2020]
            ) * 100
    
    # Concatenate the percent change DataFrame with the original numeric data
    result = pd.concat([numeric_data, percent_change], axis=1)
    
    return result

# Function to rename columns, allowing some to be left unchanged
def rename_columns(data, leave_alone=[]):
    def rename_column(col):
        if col in leave_alone:
            return col
        words = col.split('_')
        return '_'.join(words[:4]) + '_percent_change'
    
    # Apply renaming function to columns
    data.columns = [rename_column(col) for col in data.columns]
    return data

# List of columns to leave unchanged
column_leave_alone = ['County']

# Run the calculation and renaming
adjusted_wetland_metric = calculate_percent_change(adjusted_wetland_data, leave_alone=column_leave_alone)
adjusted_wetland_metric = rename_columns(adjusted_wetland_metric, leave_alone=column_leave_alone)

# Combine the results with the original data
wetland_metric = pd.concat([adjusted_wetland_data, adjusted_wetland_metric], axis=1)

# Display the resulting DataFrame
wetland_metric.head()

Unnamed: 0,County,RCP_2.6__5th_percentile__2020,RCP_2.6__50th_percentile__2020,RCP_2.6__95th_percentile__2020,RCP_4.5__5th_percentile__2020,RCP_4.5__50th_percentile__2020,RCP_4.5__95th_percentile__2020,RCP_8.5__5th_percentile__2020,RCP_8.5__50th_percentile__2020,RCP_8.5__95th_percentile__2020,...,RCP_8.5__95th_percent_change,RCP_2.6__5th_percent_change,RCP_2.6__50th_percent_change,RCP_2.6__95th_percent_change,RCP_4.5__5th_percent_change,RCP_4.5__50th_percent_change,RCP_4.5__95th_percent_change,RCP_8.5__5th_percent_change,RCP_8.5__50th_percent_change,RCP_8.5__95th_percent_change.1
0,Alameda,40.1,40.1,39.8,40.1,40.0,39.8,40.1,40.0,39.8,...,24.5,0.0,-0.498753,-16.080402,0.0,-3.5,-21.356784,-0.498753,-10.5,-38.442211
1,Alpine,,,,,,,,,,...,,,,,,,,,,
2,Amador,,,,,,,,,,...,,,,,,,,,,
3,Butte,,,,,,,,,,...,,,,,,,,,,
4,Calaveras,,,,,,,,,,...,,,,,,,,,,


In [31]:
print(wetland_metric.columns)

Index(['County', 'RCP_2.6__5th_percentile__2020',
       'RCP_2.6__50th_percentile__2020', 'RCP_2.6__95th_percentile__2020',
       'RCP_4.5__5th_percentile__2020', 'RCP_4.5__50th_percentile__2020',
       'RCP_4.5__95th_percentile__2020', 'RCP_8.5__5th_percentile__2020',
       'RCP_8.5__50th_percentile__2020', 'RCP_8.5__95th_percentile__2020',
       'RCP_2.6__5th_percentile__2100', 'RCP_2.6__50th_percentile__2100',
       'RCP_2.6__95th_percentile__2100', 'RCP_4.5__5th_percentile__2100',
       'RCP_4.5__50th_percentile__2100', 'RCP_4.5__95th_percentile__2100',
       'RCP_8.5__5th_percentile__2100', 'RCP_8.5__50th_percentile__2100',
       'RCP_8.5__95th_percentile__2100', 'County',
       'RCP_2.6__5th_percent_change', 'RCP_2.6__50th_percent_change',
       'RCP_2.6__95th_percent_change', 'RCP_4.5__5th_percent_change',
       'RCP_4.5__50th_percent_change', 'RCP_4.5__95th_percent_change',
       'RCP_8.5__5th_percent_change', 'RCP_8.5__50th_percent_change',
       'RCP_8.5__95th_p

In [32]:
wetland_metric.head()

Unnamed: 0,County,RCP_2.6__5th_percentile__2020,RCP_2.6__50th_percentile__2020,RCP_2.6__95th_percentile__2020,RCP_4.5__5th_percentile__2020,RCP_4.5__50th_percentile__2020,RCP_4.5__95th_percentile__2020,RCP_8.5__5th_percentile__2020,RCP_8.5__50th_percentile__2020,RCP_8.5__95th_percentile__2020,...,RCP_8.5__95th_percent_change,RCP_2.6__5th_percent_change,RCP_2.6__50th_percent_change,RCP_2.6__95th_percent_change,RCP_4.5__5th_percent_change,RCP_4.5__50th_percent_change,RCP_4.5__95th_percent_change,RCP_8.5__5th_percent_change,RCP_8.5__50th_percent_change,RCP_8.5__95th_percent_change.1
0,Alameda,40.1,40.1,39.8,40.1,40.0,39.8,40.1,40.0,39.8,...,24.5,0.0,-0.498753,-16.080402,0.0,-3.5,-21.356784,-0.498753,-10.5,-38.442211
1,Alpine,,,,,,,,,,...,,,,,,,,,,
2,Amador,,,,,,,,,,...,,,,,,,,,,
3,Butte,,,,,,,,,,...,,,,,,,,,,
4,Calaveras,,,,,,,,,,...,,,,,,,,,,


In [42]:
# Filter columns that include '4.5__50th_percentile'
matching_columns = [col for col in wetland_metric.columns if '4.5__50th_percentile' in col or '4.5__50th_percent' in col]

# Display the matching columns
print("Columns including '4.5__50th_percentile':")
print(matching_columns)

filtered_data = wetland_metric[matching_columns]
filtered_data.head(5)

Columns including '4.5__50th_percentile':
['RCP_4.5__50th_percentile__2020', 'RCP_4.5__50th_percentile__2100', 'RCP_4.5__50th_percent_change', 'RCP_4.5__50th_percent_change', 'RCP_4.5__50th_percent_change']


Unnamed: 0,RCP_4.5__50th_percentile__2020,RCP_4.5__50th_percentile__2100,RCP_4.5__50th_percent_change,RCP_4.5__50th_percent_change.1,RCP_4.5__50th_percent_change.2,RCP_4.5__50th_percent_change.3,RCP_4.5__50th_percent_change.4,RCP_4.5__50th_percent_change.5,RCP_4.5__50th_percent_change.6,RCP_4.5__50th_percent_change.7,RCP_4.5__50th_percent_change.8
0,40.0,38.6,40.0,38.6,-3.5,40.0,38.6,-3.5,40.0,38.6,-3.5
1,,,,,,,,,,,
2,,,,,,,,,,,
3,,,,,,,,,,,
4,,,,,,,,,,,


In [44]:
# Filter for columns that contain 'County' or 'to'
filtered_columns = [col for col in wetland_metric.columns if 'County' in col or 'percent' in col]

# Create a new DataFrame with only the filtered columns
filtered_wetland_metric = wetland_metric[filtered_columns]
# Remove duplicate columns
filtered_wetland_metric = filtered_wetland_metric.loc[:, ~filtered_wetland_metric.columns.duplicated()]

# Display the resulting DataFrame
filtered_wetland_metric.head()

Unnamed: 0,County,RCP_2.6__5th_percentile__2020,RCP_2.6__50th_percentile__2020,RCP_2.6__95th_percentile__2020,RCP_4.5__5th_percentile__2020,RCP_4.5__50th_percentile__2020,RCP_4.5__95th_percentile__2020,RCP_8.5__5th_percentile__2020,RCP_8.5__50th_percentile__2020,RCP_8.5__95th_percentile__2020,...,RCP_8.5__95th_percentile__2100,RCP_2.6__5th_percent_change,RCP_2.6__50th_percent_change,RCP_2.6__95th_percent_change,RCP_4.5__5th_percent_change,RCP_4.5__50th_percent_change,RCP_4.5__95th_percent_change,RCP_8.5__5th_percent_change,RCP_8.5__50th_percent_change,RCP_8.5__95th_percent_change
0,Alameda,40.1,40.1,39.8,40.1,40.0,39.8,40.1,40.0,39.8,...,24.5,40.1,40.1,39.8,40.1,40.0,39.8,40.1,40.0,39.8
1,Alpine,,,,,,,,,,...,,,,,,,,,,
2,Amador,,,,,,,,,,...,,,,,,,,,,
3,Butte,,,,,,,,,,...,,,,,,,,,,
4,Calaveras,,,,,,,,,,...,,,,,,,,,,


In [38]:
# Function to rename columns with an exception list
def rename_columns(col, leave_alone=[]):
    # Check if the column should be left alone
    if col in leave_alone:
        return col
    
    # Split the column name into words based on underscores
    words = col.split('_')
    
    # Get the first four words (or fewer if the column name has less than four words)
    first_four_words = '_'.join(words[:4])
    
    # Return the new name with '_percent_change' appended
    return first_four_words + '_percent_change'

# Define the list of columns to leave alone
leave_alone_columns = ['County']

# Rename the columns using the defined function
renamed_wetland_columns = filtered_wetland_metric
renamed_wetland_columns.columns = [rename_columns(col) for col in renamed_wetland_columns.columns]
renamed_wetland_columns = renamed_wetland_columns.rename(columns={'County_percent_change':'county'})
renamed_wetland_columns.columns = renamed_wetland_columns.columns.str.lower()
renamed_wetland_columns = renamed_wetland_columns.applymap(lambda s: s.lower() if type(s) == str else s)


# Display the resulting DataFrame
renamed_wetland_columns.head()

  renamed_wetland_columns = renamed_wetland_columns.applymap(lambda s: s.lower() if type(s) == str else s)


Unnamed: 0,county
0,alameda
1,alpine
2,amador
3,butte
4,calaveras


In [36]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

  ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)


Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [39]:
wetland_metric_merge = pd.merge(ca_tract_county, renamed_wetland_columns, on='county', how='left')
wetland_metric_merge

Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [None]:
wetland_metric_merge.to_csv('climate_wetland_loss_metric.csv')

NameError: name 'wetland_metric_merge' is not defined

Function Call

In [None]:
@append_metadata
def climate_slr_wetland_loss(input_csv, export=False, varname=''):
    '''
    Uploads the crop loss metrics to S3 bucket. The metrics are:
    * Drought/crop loss: average # of acres lost from drought per year
    * Drought/crop loss: average cost of crop loss from drought per year
    * Heat/crop loss: average # of acres lost from extreme heat per year
    * Heat/crop loss: average cost of crop loss from extreme heat per year
    
    Data for this metric was sourced from USDA Risk Management database:
    https://legacy.rma.usda.gov/data/cause.html

    Methods
    -------
    Relevant columns to our data metrics were isolated.
    Data was isolated to include relevant events.
    Data were grouped by county and summed to calculate final metrics.
    
    Parameters
    ----------
    input_csv: string
        csv crop loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI crop loss metrics to AWS
        True = will upload resulting df containing CAL CRAI crop loss metrics to AWS

    Script
    ------
    climate_slr_wetland_loss.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data was grouped by county and averaged by year.')
    print('Data transformation: data was merged with California census tracts.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [None]:
input_csvs = [
            'climate_heat_crop_loss_acres_metric.csv',
            'climate_drought_crop_loss_acres_metric.csv',
            'climate_heat_crop_loss_cost_metric.csv',
            'climate_drought_crop_loss_cost_metric.csv',
            ]

varnames = [
            'climate_usda_heat_acres_lost',
            'climate_usda_drought_acres_lost',
            'climate_usda_heat_crop_cost',
            'climate_usda_drought_crop_cost'
            ]

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    print(f'Processing {input_csv} with varname {varname}')
    climate_slr_wetland_loss(input_csv, export=False, varname='test')
    print(f'Completed uploading {input_csv} with varname {varname}!')