## Cal-CRAI metric calculation: Climate Risk Wetland Loss
* projected percent change in wetland habitat under 4.5 RCP at median model sea level rise per county

In [1]:
import os
import sys
import pandas as pd
import io
import numpy as np
import geopandas as gpd

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)
from scripts.utils.file_helpers import (
    pull_csv_from_directory, upload_csv_aws
) 

In [2]:
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/sea_level_rise/loss/climate_central/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

Saved DataFrame as 'csv_folder\RCP_wetland_data.csv'


In [3]:
wetland_data = pd.read_csv(r'csv_folder/RCP_wetland_data.csv')

In [4]:
# Adjust the data
adjusted_wetland_data = wetland_data[22:]

# Set the first row as the header and reset index
adjusted_wetland_data.columns = adjusted_wetland_data.iloc[0]
adjusted_wetland_data = adjusted_wetland_data[1:].reset_index(drop=True)

# Drop the index column if it has been set as a column
adjusted_wetland_data.reset_index(drop=True, inplace=True)

# Rename columns to ensure no extra index is included
adjusted_wetland_data.columns.name = None

# Filter columns explicitly
columns_to_keep = [col for col in adjusted_wetland_data.columns 
                    if 'County' in col or '2000' in col or '2100' in col]
adjusted_wetland_data = adjusted_wetland_data[columns_to_keep]

adjusted_wetland_data.columns

Index(['County', 'RCP_2.6__5th_percentile__2000',
       'RCP_2.6__50th_percentile__2000', 'RCP_2.6__95th_percentile__2000',
       'RCP_4.5__5th_percentile__2000', 'RCP_4.5__50th_percentile__2000',
       'RCP_4.5__95th_percentile__2000', 'RCP_8.5__5th_percentile__2000',
       'RCP_8.5__50th_percentile__2000', 'RCP_8.5__95th_percentile__2000',
       'RCP_2.6__5th_percentile__2100', 'RCP_2.6__50th_percentile__2100',
       'RCP_2.6__95th_percentile__2100', 'RCP_4.5__5th_percentile__2100',
       'RCP_4.5__50th_percentile__2100', 'RCP_4.5__95th_percentile__2100',
       'RCP_8.5__5th_percentile__2100', 'RCP_8.5__50th_percentile__2100',
       'RCP_8.5__95th_percentile__2100'],
      dtype='object')

In [5]:
adjusted_wetland_data.head()

Unnamed: 0,County,RCP_2.6__5th_percentile__2000,RCP_2.6__50th_percentile__2000,RCP_2.6__95th_percentile__2000,RCP_4.5__5th_percentile__2000,RCP_4.5__50th_percentile__2000,RCP_4.5__95th_percentile__2000,RCP_8.5__5th_percentile__2000,RCP_8.5__50th_percentile__2000,RCP_8.5__95th_percentile__2000,RCP_2.6__5th_percentile__2100,RCP_2.6__50th_percentile__2100,RCP_2.6__95th_percentile__2100,RCP_4.5__5th_percentile__2100,RCP_4.5__50th_percentile__2100,RCP_4.5__95th_percentile__2100,RCP_8.5__5th_percentile__2100,RCP_8.5__50th_percentile__2100,RCP_8.5__95th_percentile__2100
0,Alameda,40.1,40.1,40.1,40.1,40.1,40.1,40.1,40.1,40.1,40.1,39.9,33.4,40.1,38.6,31.3,39.9,35.8,24.5
1,Alpine,,,,,,,,,,,,,,,,,,
2,Amador,,,,,,,,,,,,,,,,,,
3,Butte,,,,,,,,,,,,,,,,,,
4,Calaveras,,,,,,,,,,,,,,,,,,


In [6]:
# Function to calculate percent change between 2000 and 2100 columns
def calculate_percent_change(data, leave_alone=[]):
    # Convert columns to numeric, forcing non-numeric to NaN (skip columns in leave_alone)
    numeric_data = data.copy()
    for col in data.columns:
        if col not in leave_alone:
            numeric_data[col] = pd.to_numeric(data[col], errors='coerce')
    
    # Define columns for 2000 and 2100
    cols_2000 = [col for col in numeric_data.columns if '2000' in col]
    cols_2100 = [col for col in numeric_data.columns if '2100' in col]
    
    # Calculate percent change
    percent_change = pd.DataFrame()

    for cols_2000 in cols_2000:
        # Find the matching 2100 column
        col_2100 = cols_2000.replace('2000', '2100')

        if col_2100 in cols_2100:
            # Calculate percent change, handling NaN values
            percent_change[cols_2000 + '_to_' + col_2100] = (
                (numeric_data[col_2100] - numeric_data[cols_2000]) / numeric_data[cols_2000]
            ) * 100
    
    # Concatenate the percent change DataFrame with the original numeric data
    result = pd.concat([numeric_data, percent_change], axis=1)
    
    return result

# Function to rename columns, allowing some to be left unchanged
def rename_columns(data, leave_alone=[]):
    def rename_column(col):
        if col in leave_alone:
            return col
        words = col.split('_')
        return '_'.join(words[:4]) + '_percent_change'
    
    # Apply renaming function to columns
    data.columns = [rename_column(col) for col in data.columns]
    return data

# List of columns to leave unchanged
column_leave_alone = ['County']

In [7]:
# Run the calculation and renaming
adjusted_wetland_metric = calculate_percent_change(adjusted_wetland_data, leave_alone=column_leave_alone)

# Filter for columns that contain 'County' or 'to'
filtered_columns = [col for col in adjusted_wetland_metric.columns if 'County' in col or 'to' in col]

# Create a new DataFrame with only the filtered columns
filtered_wetland_metric = adjusted_wetland_metric[filtered_columns]
# Remove duplicate columns
filtered_wetland_metric = filtered_wetland_metric.loc[:, ~filtered_wetland_metric.columns.duplicated()]

wetland_metric_percent_change = rename_columns(filtered_wetland_metric, leave_alone=column_leave_alone)

wetland_metric_percent_change.columns = wetland_metric_percent_change.columns.str.lower()
wetland_metric_percent_change = wetland_metric_percent_change.applymap(lambda s: s.lower() if type(s) == str else s)

# Display the resulting DataFrame
wetland_metric_percent_change

  wetland_metric_percent_change = wetland_metric_percent_change.applymap(lambda s: s.lower() if type(s) == str else s)


Unnamed: 0,county,rcp_2.6__5th_percent_change,rcp_2.6__50th_percent_change,rcp_2.6__95th_percent_change,rcp_4.5__5th_percent_change,rcp_4.5__50th_percent_change,rcp_4.5__95th_percent_change,rcp_8.5__5th_percent_change,rcp_8.5__50th_percent_change,rcp_8.5__95th_percent_change
0,alameda,0.0,-0.498753,-16.708229,0.0,-3.740648,-21.945137,-0.498753,-10.723192,-38.902743
1,alpine,,,,,,,,,
2,amador,,,,,,,,,
3,butte,,,,,,,,,
4,calaveras,,,,,,,,,
5,colusa,,,,,,,,,
6,contra costa,0.0,-0.857143,-28.571429,0.0,-7.142857,-35.142857,-0.857143,-20.285714,-58.285714
7,del norte,0.0,0.0,-10.714286,0.0,0.0,-14.285714,0.0,-7.142857,-28.571429
8,el dorado,,,,,,,,,
9,fresno,,,,,,,,,


In [8]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

  ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)


Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [9]:
wetland_metric_merge = pd.merge(ca_tract_county, wetland_metric_percent_change, on='county', how='left')
wetland_metric_merge

Unnamed: 0,tract,countyfp,county,rcp_2.6__5th_percent_change,rcp_2.6__50th_percent_change,rcp_2.6__95th_percent_change,rcp_4.5__5th_percent_change,rcp_4.5__50th_percent_change,rcp_4.5__95th_percent_change,rcp_8.5__5th_percent_change,rcp_8.5__50th_percent_change,rcp_8.5__95th_percent_change
0,06085504321,085,santa clara,0.0,-0.970874,-14.563107,0.0,-4.854369,-16.504854,-0.970874,-10.679612,-24.271845
1,06085504410,085,santa clara,0.0,-0.970874,-14.563107,0.0,-4.854369,-16.504854,-0.970874,-10.679612,-24.271845
2,06085507003,085,santa clara,0.0,-0.970874,-14.563107,0.0,-4.854369,-16.504854,-0.970874,-10.679612,-24.271845
3,06085507004,085,santa clara,0.0,-0.970874,-14.563107,0.0,-4.854369,-16.504854,-0.970874,-10.679612,-24.271845
4,06085502204,085,santa clara,0.0,-0.970874,-14.563107,0.0,-4.854369,-16.504854,-0.970874,-10.679612,-24.271845
...,...,...,...,...,...,...,...,...,...,...,...,...
9124,06059001303,059,orange,0.0,0.000000,-21.428571,0.0,-5.952381,-28.571429,0.000000,-15.476190,-55.952381
9125,06059001304,059,orange,0.0,0.000000,-21.428571,0.0,-5.952381,-28.571429,0.000000,-15.476190,-55.952381
9126,06059001401,059,orange,0.0,0.000000,-21.428571,0.0,-5.952381,-28.571429,0.000000,-15.476190,-55.952381
9127,06013367200,013,contra costa,0.0,-0.857143,-28.571429,0.0,-7.142857,-35.142857,-0.857143,-20.285714,-58.285714


## Decided to use RCP 4.5 and 50th percentile for our final metric calulation

In [10]:
final_columns = ['tract', 'county', 'rcp_4.5__50th_percent_change']
wetland_metric_final = wetland_metric_merge[final_columns]
wetland_metric_final

Unnamed: 0,tract,county,rcp_4.5__50th_percent_change
0,06085504321,santa clara,-4.854369
1,06085504410,santa clara,-4.854369
2,06085507003,santa clara,-4.854369
3,06085507004,santa clara,-4.854369
4,06085502204,santa clara,-4.854369
...,...,...,...
9124,06059001303,orange,-5.952381
9125,06059001304,orange,-5.952381
9126,06059001401,orange,-5.952381
9127,06013367200,contra costa,-7.142857


In [11]:
# checking with Alameda, after hand calculating it should be -3.74 percent change
alameda = wetland_metric_final[wetland_metric_final['county'] == 'alameda']
alameda

Unnamed: 0,tract,county,rcp_4.5__50th_percent_change
80,06001428301,alameda,-3.740648
81,06001428302,alameda,-3.740648
159,06001428400,alameda,-3.740648
160,06001430900,alameda,-3.740648
174,06001431000,alameda,-3.740648
...,...,...,...
7511,06001435104,alameda,-3.740648
7512,06001405301,alameda,-3.740648
7516,06001982000,alameda,-3.740648
7517,06001450102,alameda,-3.740648


In [12]:
wetland_metric_final.to_csv('climate_wetland_loss_metric.csv')

Function Call

In [13]:
@append_metadata
def climate_slr_wetland_loss(input_csv, export=False, varname=''):
    '''
    Uploads the crop loss metrics to S3 bucket. The metric is:
    * projected percent change in wetland habitat under 4.5 RCP at median model sea level rise per county
    
    Data for this metric was sourced from Climate Central at:
    https://coastal.climatecentral.org/map/13/-117.0358/32.5017/?theme=wetlands&map_type=annual_accretion

    Methods
    -------
    Relevant columns to our data metrics were isolated.
    Percent change between estimated wetland area between 2000 and 2100 projections were calculated.
    Metric columns were named, isolated, and merged to CA census tracts based on shared county.
    
    Parameters
    ----------
    input_csv: string
        csv wetland loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI wetland loss metrics to AWS
        True = will upload resulting df containing CAL CRAI wetland loss metrics to AWS

    Script
    ------
    climate_slr_wetland_loss.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated (percentile data from 2000 and 2100)')
    print('Data transformation: percent change was calculated, isolated, and renamed.')
    print('Data transformation: data was merged with California census tracts.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [14]:
input_csvs = [
            'climate_wetland_loss_metric.csv'
            ]

varnames = [
            'climate_climate_central_wetland_loss'
            ]

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    print(f'Processing {input_csv} with varname {varname}')
    climate_slr_wetland_loss(input_csv, export=False, varname=varname)
    print(f'Completed uploading {input_csv} with varname {varname}!')

Processing climate_wetland_loss_metric.csv with varname climate_climate_central_wetland_loss
Completed uploading climate_wetland_loss_metric.csv with varname climate_climate_central_wetland_loss!
