## Cal-CRAI Metric Calculation
Domain: Climate Risks \
Indicator: Sea Level Rise Exposure

This notebook calculates 8 metrics, sourced from Pacific Institute:
* Metric 1: Delta percent change in slr tract vulnerablility between 2000 and 2100
* Metric 2: Number of exposed buildings
* Metric 3: Monetrary value of replacing buildings & contents
* Metric 4: Delta change in number of fire stations vulnerable to sea level rise between baseline and 1.4m rise conditions
* Metric 5: Delta change in number of police stations vulnerable to sea level rise between baseline and 1.4m rise conditions
* Metric 6: Delta change in number of schools vulnerable to sea level rise between baseline and 1.4m rise conditions
* Metric 7: Delta change in number of hospitals vulnerable to sea level rise between baseline and 1.4m rise conditions
* Metric 8: Number of wastewater treatment plants vulnerable to sea level rise under 1.4m rise conditions

Note: All layers will have a SLR mask applied in the 'climate hazard score' notebook to ensure that coastal impacts are focused on. More info on the SLR mask found here: `climate_slr_mask_layer.ipynb`

In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata
from scripts.utils.calculate_index import add_census_tracts

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/climate_risk/sea_level_rise/exposure/projections/pacific_institute/'
output_folder = 'csv_folder'

pull_gpkg_from_directory(bucket_name, aws_dir)
pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False)

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/sea_level_rise/exposure/projections/pacific_institute/'
output_folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False)

### Metric 1: delta in % tract vulnerable to sea level rise between baseline and 1.4m rise conditions
    * the data is at the census block level, so there are multiple entries for census tracts
    * group by census tract and take the average percentage per tract

In [None]:
census_vulnerable_slr_tracts = pd.read_csv(r'csv_folder/climate_pacific_institute_slr_exposure_census_vulnerability.csv')
#os.remove('climate_pacific_institute_slr_exposure_census_vulnerability.csv')
census_vulnerable_slr_tracts

In [None]:
# select relevant columns
slr_tracts_columns = census_vulnerable_slr_tracts[['CensusTract', 'Perc_2000', 'Perc_2100']]
slr_tracts_columns = slr_tracts_columns.rename(columns={'CensusTract':'tract','Perc_2000':'percent_2000', 'Perc_2100':'percent_2100'})
slr_tracts_columns

In [None]:
# this data is at the census block scale so there are multiple repeat census tracts
# census tracts are grouped together and have their percentage columns averaged
slr_tracts_grouped = slr_tracts_columns.groupby('tract')[['percent_2000', 'percent_2100']].mean().reset_index()
slr_tracts_grouped

In [None]:
# looking at how many unique tracts there are
unique = slr_tracts_grouped.tract.unique()
print(len(unique))

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)

# adjust the columns and entries within so merging with slr data is easier
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'countyfp'})
if 'geometry' in ca_tract_county.columns:
    ca_tract_county = ca_tract_county.drop(columns={'geometry'})

# Remove leading zeros from the 'tract' column
ca_tract_county['tract'] = ca_tract_county['tract'].str.lstrip('0')
ca_tract_county

In [None]:
# make sure columns are the same datatype
ca_tract_county['tract'] = ca_tract_county['tract'].astype(str)
slr_tracts_grouped['tract'] = slr_tracts_grouped['tract'].astype(str)

# merge slr data with census tract data
merged_slr_tracts = pd.merge(ca_tract_county, slr_tracts_grouped, on='tract', how='left')
merged_slr_tracts

In [None]:
metric_slr_tracts = merged_slr_tracts

# calculate delta percent change between 2000 and 2100 columns
metric_slr_tracts['delta_percentage_change'] = (merged_slr_tracts['percent_2100'] - merged_slr_tracts['percent_2000']) * 100
metric_slr_tracts

In [None]:
# Find the index of the maximum value in the delta column
max_index = metric_slr_tracts['delta_percentage_change'].idxmax()

# Retrieve the row corresponding to the maximum value
max_row = metric_slr_tracts.loc[max_index]

# Display the entire row
print(max_row)

In [12]:
## apply the mask (utils/slr_mask_layer.csv')
slr_coast_counties = ['001', '013', '015', '023', '037', '041', '045', '053', '055', '059', '067', '073', '075', '077', '079', '081', '083', '085', '087', '095', '097', '111', '113']

In [None]:
# add geoid col for mask
metric_slr_tracts['GEOID'] = '0' + metric_slr_tracts['tract']
metric_slr_tracts

In [None]:
# merged_slr_tracts['GEOID'] = '0' + merged_slr_tracts['tract']
gdf = add_census_tracts(metric_slr_tracts)
gdf.head(5)

In [15]:
# set any county not in slr_coast_counties to nan
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'delta_percentage_change'] = np.nan

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties)]['delta_percentage_change'].unique() # confirmed applied!

In [None]:
gdf['delta_percentage_change'].min(), gdf['delta_percentage_change'].max()

In [None]:
gdf.plot('delta_percentage_change', legend=True) # visual check

In [19]:
# clean up before export
gdf_to_save = gdf[['tract', 'county', 'delta_percentage_change']]
gdf_to_save = gdf_to_save.rename(columns={'delta_percentage_change':'slr_vulnerability_delta_percentage_change'})

In [None]:
gdf_to_save

In [21]:
# save as csv
gdf_to_save.to_csv('climate_slr_vulnerable_metric.csv', index=False)

## Metric 2: # of exposed buildings

In [22]:
building_data = pd.read_csv(r'csv_folder/building_count.csv')

In [None]:
columns_to_sum = [
'Residential',
'Commercial',
'Industrial',
'Agricultural',
'Religious',
'Government',
'Education'
]

# Fill NaNs with 0 before summing, if you want to treat NaNs as zeroes
building_data[columns_to_sum] = building_data[columns_to_sum].fillna(0)

# Recalculate the total
building_data['total'] = building_data[columns_to_sum].sum(axis=1)
building_data

In [None]:
building_columns = building_data[['Tract', 'CountyFIPS', 'total']]
building_columns

In [None]:
grouped_building_data = building_columns.groupby('Tract')['total'].sum().reset_index(name='building_count')
grouped_building_data = grouped_building_data.rename(columns={'Tract':'tract'})
grouped_building_data

In [None]:
# make sure columns are the same datatype
grouped_building_data['tract'] = grouped_building_data['tract'].astype(str)

# merge slr data with census tract data
merged_vulnerable_buildings = pd.merge(ca_tract_county, grouped_building_data, on='tract', how='left')
merged_vulnerable_buildings['GEOID'] = '0' + merged_vulnerable_buildings['tract']
merged_vulnerable_buildings

In [None]:
gdf = add_census_tracts(merged_vulnerable_buildings)
gdf.head(5)

In [28]:
# set any county not in slr_coast_counties to nan
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'building_count'] = np.nan

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties)]['building_count'].unique() # confirmed applied!

In [None]:
gdf['building_count'].min(), gdf['building_count'].max()

In [None]:
gdf.plot('building_count', legend=True)

In [32]:
# clean up before export
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'building_count']]
gdf_to_save = gdf_to_save.rename(columns={'building_count':'building_exposed_slr_count'})

In [33]:
gdf_to_save.to_csv('climate_slr_vulnerable_buildings_metric.csv', index=False)

## Metric #3: value of replacing buildings & contents from potential SLR

In [34]:
building_cost_data = pd.read_csv(r'csv_folder/total_value_building_contents.csv')
building_cost_columns = building_cost_data[['Tract', 'CountyFIPS', 'Total']]

In [None]:
# to check totals calculated in the cell below are accurate
check_tract = building_cost_columns[building_cost_columns['Tract'] == 6001400100]
total_sum = check_tract['Total'].sum()
print(total_sum)

In [None]:
grouped_building_cost = building_cost_columns.groupby('Tract')['Total'].sum().reset_index(name='building_content_cost')
grouped_building_cost = grouped_building_cost.rename(columns={'Tract':'tract'})
grouped_building_cost

In [None]:
# make sure columns are the same datatype
grouped_building_cost['tract'] = grouped_building_cost['tract'].astype(str)

# merge slr data with census tract data
merged_building_content_cost = pd.merge(ca_tract_county, grouped_building_cost, on='tract', how='left')
merged_building_content_cost

In [None]:
# how many non nan entries do we have in our final metric for building and content cost
print(len(merged_building_content_cost.building_content_cost.unique()))

In [None]:
# apply mask
merged_building_content_cost['GEOID'] = '0' + merged_building_content_cost['tract']
gdf = add_census_tracts(merged_building_content_cost)
gdf.head(5)

In [40]:
# set any county not in slr_coast_counties to nan
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'building_content_cost'] = np.nan

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties)]['building_content_cost'].unique() # confirmed applied!

In [None]:
gdf['building_content_cost'].min(), gdf['building_content_cost'].max()

In [None]:
gdf.plot('building_content_cost', legend=True)

In [44]:
# clean up before export
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'building_content_cost']]
gdf_to_save = gdf_to_save.rename(columns={'building_content_cost':'slr_vulnerable_building_content_cost'})

In [45]:
gdf_to_save.to_csv('climate_slr_buildings_contents_cost_metric.csv', index=False)

## Now for metrics that do not have both baseline and 1.4m rise conditions
* number of wastewater treatment plants vulnerable to sea level rise under 1.4m rise conditions

In [46]:
# first calling all shapefiles in
slr_fire_stations_2000 = gpd.read_file('climate_pacific_institute_slr_exposure_fire_stations_2000.gpkg')
slr_fire_stations_2100 = gpd.read_file('climate_pacific_institute_slr_exposure_fire_stations_2100.gpkg')
slr_hospitals_2000 = gpd.read_file('climate_pacific_institute_slr_exposure_hospitals_2000.gpkg')
slr_hospitals_2100 = gpd.read_file('climate_pacific_institute_slr_exposure_hospitals_2100.gpkg')
slr_police_stations_2000 = gpd.read_file('climate_pacific_institute_slr_exposure_police_stations_2000.gpkg')
slr_police_stations_2100 = gpd.read_file('climate_pacific_institute_slr_exposure_police_stations_2100.gpkg')
slr_schools_2000 = gpd.read_file('climate_pacific_institute_slr_exposure_schools_2000.gpkg')
slr_schools_2100 = gpd.read_file('climate_pacific_institute_slr_exposure_schools_2100.gpkg')

slr_wastewater_2100 = gpd.read_file('climate_pacific_institute_slr_exposure_wastewater_treatment_2100.gpkg')

In [None]:
county_slr_wastewater_2100 = slr_wastewater_2100[['USCB_COUNTYFP', 'USCB_NAME']]
county_slr_wastewater_2100.head()

In [None]:
county_slr_wastewater_2100.groupby('USCB_COUNTYFP').count().reset_index()

In [None]:
county_slr_wastewater_2100.USCB_COUNTYFP.unique()

## Looking at shared columns between all of the shape files
* all share countyfp columns which is what we want

In [None]:
# List of GeoDataFrames
gdfs = [slr_fire_stations_2000, slr_fire_stations_2100, slr_hospitals_2000, slr_hospitals_2100,
        slr_police_stations_2000, slr_police_stations_2100, slr_schools_2000, slr_schools_2100,
        slr_wastewater_2100]

# Get the set of columns for each GeoDataFrame
column_sets = [set(gdf.columns) for gdf in gdfs]

# Find the intersection of all columns (shared columns)
shared_columns = set.intersection(*column_sets)

# Find the unique columns for each GeoDataFrame
unique_columns = [columns - shared_columns for columns in column_sets]

# Print the shared columns
print("Shared columns among all GeoDataFrames:", shared_columns)

# Print the unique columns for each GeoDataFrame
for idx, unique in enumerate(unique_columns):
    print(f"Unique columns in GeoDataFrame {idx+1}: {unique}")

In [51]:
# all geodataframes share a common county fp column, so we have a function to count number
# of rows for a specified county fp number
# this will help fact check our new dfs that contain county counts per metric
def count_entries_by_county_fp(df, county_fp_number):
    """
    Returns the count of entries in the 'USCB_COUNTYFP' column that match the given county_fp_number.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    county_fp_number (str or int): The county FP number to match.

    Returns:
    int: The count of matching entries.
    """
    return df['USCB_COUNTYFP'].astype(str).eq(str(county_fp_number)).sum()

In [52]:
slr_wastewater_count = county_slr_wastewater_2100.groupby('USCB_COUNTYFP').size().reset_index(name='count')

In [None]:
# Rename the count column
slr_wastewater_count = slr_wastewater_count.rename(columns={'USCB_COUNTYFP':'countyfp', 'count': 'wastewater_count'})

slr_wastewater_metric = slr_wastewater_count
slr_wastewater_metric

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

# List of columns you want to check and drop if they exist
columns_to_drop = ['geometry']  # Replace with your actual column names

# Check if the column exists, and drop it if it does
for col in columns_to_drop:
    if col in ca_tract_county.columns:
        ca_tract_county = ca_tract_county.drop(columns=col)

ca_tract_county

In [55]:
merged_slr_wastewater_metric = pd.merge(ca_tract_county, slr_wastewater_metric, on='countyfp', how='left')

In [None]:
# apply mask
merged_slr_wastewater_metric['GEOID'] = merged_slr_wastewater_metric['tract']
gdf = add_census_tracts(merged_slr_wastewater_metric)
gdf.head(5)

In [57]:
# set any county not in slr_coast_counties to nan
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'wastewater_count'] = np.nan

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties)]['wastewater_count'].unique() # confirmed applied!

In [None]:
gdf['wastewater_count'].min(), gdf['wastewater_count'].max()

In [None]:
gdf.plot('wastewater_count', legend=True)

In [61]:
# clean up before export
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'wastewater_count']]

In [62]:
gdf_to_save = gdf_to_save.drop(columns=['tract'])  # using a list
gdf_to_save = gdf_to_save.rename(columns={'wastewater_count':'slr_vulnerable_wastewater_treatment_count'})

In [64]:
gdf_to_save.to_csv('climate_slr_wastewater_metric.csv', index=False)

## Now for the rest of the metrics, metrics that find the change in number of at risk facilities between baseline conditions and their 1.4m SLR future counterparts
* delta number of fire stations vulnerable to sea level rise
* delta number of police stations vulnerable to sea level rise
* delta number of schools vulnerable to sea level rise
* delta number of hospitals vulnerable to sea level rise

In [None]:
gdf_names = ['slr_fire_stations_2000', 'slr_fire_stations_2100', 'slr_hospitals_2000', 'slr_hospitals_2100',
             'slr_police_stations_2000', 'slr_police_stations_2100', 'slr_schools_2000', 'slr_schools_2100']

# Dictionary to store the count DataFrames for 2000 and 2100
count_dfs = {}

# Loop through each GeoDataFrame and its corresponding name
for gdf, gdf_name in zip(gdfs, gdf_names):
    # Isolate the required columns
    df_isolated = gdf[['USCB_NAME', 'USCB_COUNTYFP']]

    # Group by USCB_COUNTYFP and count the occurrences
    df_grouped = df_isolated.groupby('USCB_COUNTYFP').size().reset_index(name='count')
    df_grouped = df_grouped.rename(columns={'USCB_COUNTYFP': 'countyfp'})

    # Store the count DataFrame in the dictionary
    count_dfs[gdf_name] = df_grouped

# Dictionary to store the new DataFrames with differences
difference_dfs = {}

# Loop through the 2000 and 2100 pairs
for phrasing in ['fire_stations', 'hospitals', 'police_stations', 'schools', 'wastewater']:
    df_2000_name = f'slr_{phrasing}_2000'
    df_2100_name = f'slr_{phrasing}_2100'

    if df_2000_name in count_dfs and df_2100_name in count_dfs:
        df_2000 = count_dfs[df_2000_name]
        df_2100 = count_dfs[df_2100_name]

        # Perform an outer merge to include all counties
        merged_df = df_2000.merge(df_2100, on='countyfp', how='outer', suffixes=('_2000', '_2100'))

        # Fill NaN values with 0 for count columns
        merged_df.fillna({'count_2000': 0},inplace=True)
        merged_df.fillna({'count_2100': 0}, inplace=True)

        # Calculate the difference in counts
        merged_df[f'{phrasing}_count_diff'] = merged_df['count_2100'] - merged_df['count_2000']

        # Select only the 'countyfp' and difference columns
        difference_df = merged_df[['countyfp', f'{phrasing}_count_diff']]

        # Create a new name for the DataFrame
        new_df_name = f'slr_{phrasing}_count_diff'

        # Add the new DataFrame to the dictionary
        difference_dfs[new_df_name] = difference_df

# Access the new DataFrames using difference_dfs dictionary
for name, df in difference_dfs.items():
    print(f"\n{name}:\n", df)

**Schools**

In [None]:
new_name = 'slr_schools_count_metric'

# Merge with ca_tract_county
merged_df = pd.merge(ca_tract_county, difference_dfs['slr_schools_count_diff'], on='countyfp', how='left')
merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna(0)
merged_df['GEOID'] = merged_df['tract']

gdf = add_census_tracts(merged_df)
gdf

In [None]:
slr_coast_counties = ['001', '013', '015', '023', '037', '041', '045', '053', '055', '059', '067', '073', '075', '077', '079', '081', '083', '085', '087', '095', '097', '111', '113']
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'schools_count_diff'] = np.nan
print(gdf['schools_count_diff'].min(), gdf['schools_count_diff'].max())

In [None]:
gdf.plot('schools_count_diff', legend=True)

In [None]:
# export
csv_file_path = f"climate_{new_name}.csv"
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'schools_count_diff']]
gdf_to_save.to_csv(csv_file_path, index=False)
print(f"{csv_file_path}")

**Police stations**

In [None]:
new_name = 'slr_police_stations_count_metric'

# Merge with ca_tract_county
merged_df = pd.merge(ca_tract_county, difference_dfs['slr_police_stations_count_diff'], on='countyfp', how='left')
merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna(0)
merged_df['GEOID'] = merged_df['tract']

gdf = add_census_tracts(merged_df)
gdf

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'police_stations_count_diff'] = np.nan
print(gdf['police_stations_count_diff'].min(), gdf['police_stations_count_diff'].max())

In [None]:
gdf.plot('police_stations_count_diff', legend=True)

In [None]:
# export
csv_file_path = f"climate_{new_name}.csv"
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'police_stations_count_diff']]
gdf_to_save.to_csv(csv_file_path, index=False)
print(f"{csv_file_path}")

**Fire Stations**

In [None]:
new_name = 'slr_fire_stations_count_metric'

# Merge with ca_tract_county
merged_df = pd.merge(ca_tract_county, difference_dfs['slr_fire_stations_count_diff'], on='countyfp', how='left')
merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna(0)
merged_df['GEOID'] = merged_df['tract']

gdf = add_census_tracts(merged_df)
gdf

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'fire_stations_count_diff'] = np.nan
print(gdf['fire_stations_count_diff'].min(), gdf['fire_stations_count_diff'].max())

In [None]:
gdf.plot('fire_stations_count_diff', legend=True)

In [None]:
# export
csv_file_path = f"climate_{new_name}.csv"
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'fire_stations_count_diff']]
gdf_to_save.to_csv(csv_file_path, index=False)
print(f"{csv_file_path}")

**Hospitals**

In [None]:
new_name = 'slr_hospitals_count_metric'

# Merge with ca_tract_county
merged_df = pd.merge(ca_tract_county, difference_dfs['slr_hospitals_count_diff'], on='countyfp', how='left')
merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna(0)
merged_df['GEOID'] = merged_df['tract']

gdf = add_census_tracts(merged_df)
gdf

In [None]:
gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), 'hospitals_count_diff'] = np.nan
print(gdf['hospitals_count_diff'].min(), gdf['hospitals_count_diff'].max())

In [None]:
gdf.plot('hospitals_count_diff', legend=True)

In [None]:
# export
csv_file_path = f"climate_{new_name}.csv"
gdf_to_save = gdf[['tract', 'GEOID', 'county', 'hospitals_count_diff']]
gdf_to_save.to_csv(csv_file_path, index=False)
print(f"{csv_file_path}")

In [None]:
## OLD CODE -- APPEARED TO BE OVERWRITING OLD DF FILES AND PRODUCING NAN VALUES IN ALL COASTAL COUNTIES
# # Dictionary to store the merged DataFrames
# merged_dfs = {}

# # Loop through each output DataFrame in new_dfs
# for name, df in difference_dfs.items():
#     # Create a new name for the DataFrame with '_metric' suffix
#     new_name = name.replace('_diff', '_metric')

#     # Merge with ca_tract_county
#     merged_df = pd.merge(ca_tract_county, df, on='countyfp', how='left')
#     merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna(0)
#     merged_df['GEOID'] = merged_df['tract']
    
#     # Store the merged DataFrame in the dictionary
#     # merged_dfs[new_name] = merged_df

#     # apply mask
#     gdf = add_census_tracts(merged_df)
    
#     # set any county not in slr_coast_counties to nan
#     gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties), new_name] = np.nan
#     print(f'Values in masked counties for {new_name}:', gdf.loc[~gdf['COUNTYFP'].isin(slr_coast_counties)][new_name].unique()) # confirmed applied!
#     print(f'Values in unmasked counties for {new_name}:', gdf.loc[gdf['COUNTYFP'].isin(slr_coast_counties)][new_name].unique()) # confirmed applied!
    

#     # Save the merged DataFrame as a CSV file
#     csv_file_path = f"climate_{new_name}.csv"
#     gdf_to_save = gdf[['tract', 'GEOID', 'county', new_name]]
#     gdf_to_save.to_csv(csv_file_path, index=False)
#     print(f"{csv_file_path}")

# # # Access the merged DataFrames using merged_dfs dictionary
# # for name, df in merged_dfs.items():
# #     print(f"\n{name}:\n", df)

## Function Call

In [82]:
@append_metadata
def slr_pacific_metric_upload(input_csv, export=False, varname=""):  
    '''
    The function uploads calculated metrics for sea level rise sourced from Pacific Institute at:
    https://pacinst.org/califonia-sea-level-rise-gis-data-downloads/
    
    The metrics include:
    * delta percent change in slr tract vulnerablility between 2000 and 2100
    * number of exposed buildings
    * $ value of replacing buildings & contents
    * delta change in number of fire stations vulnerable to sea level rise between baseline and 1.4m rise conditions
    * delta change in number of police stations vulnerable to sea level rise between baseline and 1.4m rise conditions
    * delta change in number of schools vulnerable to sea level rise between baseline and 1.4m rise conditions
    * delta change in number of hospitals vulnerable to sea level rise between baseline and 1.4m rise conditions
    * number of wastewater treatment plants vulnerable to sea level rise under 1.4m rise conditions

    Parameters
    ----------
    input_csv: list
        list of calculated metric csv's
    export: bool, optional
        True to upload csvs to AWS, False otherwise.

    Methods
    --------
    Relevant columns for Cal-CRAI SLR metrics were isolated from the original dataset.
    For 'number of' metrics, data were grouped and summed by county.
    For 'percent of' metrics, data were grouped by tract and averaged.
    Most datasets contained data for both baseline and 1.4m SLR, in those cases, the delta change was found and used as the metric
    Data were then merged with California census data so 'number of' metrics represent total vulnerable facilities for each county while 'percent of' metrics represent the vulnerability at the tract level.

    Script
    ------
    climate_slr_pacific.ipynb

    Note
    ------
    This function assumes users have configured the AWS CLI such that their access key / 
    secret key pair are stored in ~/.aws/credentials. 
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    ''' 
    print('Data transformation: eliminate excess headers and columns not relevant to metric calculation.')
    print('Data transformation: for number of metrics, data were grouped and summed by county.')
    print('Data transformation: for percentage of metrics, data were grouped by tract and averaged as there were multiple entries per tract.')
    print('Data transformation: data were merged to California census tracts so each metric value were attributed to CA tracts.')
    print('Data transformation: SLR mask applied to in-land counties to highlight coastal vulnerability and ensure no in-land tract skews results for coastal at-risk regions.')
    
    if export == True:
        # pull csv from aws
        bucket_name = 'ca-climate-index'
        upload_csv_aws([input_csv], bucket_name, '3_fair_data/index_data')

    if export == False:
        print(f'{input_csv} uploaded to AWS.')

        os.remove(input_csv)  # Remove local file after upload

In [83]:
input_csvs = [
'climate_slr_vulnerable_metric.csv', 
'climate_slr_vulnerable_buildings_metric.csv',
'climate_slr_buildings_contents_cost_metric.csv',  
'climate_slr_fire_stations_count_metric.csv',
'climate_slr_police_stations_count_metric.csv',
'climate_slr_schools_count_metric.csv',
'climate_slr_hospitals_count_metric.csv',
'climate_slr_wastewater_metric.csv'
]

varnames = [
'climate_pacific_institute_slr_exposure_tracts_delta',
'climate_pacific_institute_slr_exposure_building_count',
'climate_pacific_institute_slr_exposure_building_cost',
'climate_pacific_institute_slr_exposure_fire_stations_delta',
'climate_pacific_institute_slr_exposure_police_stations_delta',
'climate_pacific_institute_slr_exposure_schools_delta',
'climate_pacific_institute_slr_exposure_hospitals_delta',
'climate_pacific_institute_slr_exposure_wastewater_treatment'
]

# Process the data and export   
for csv, var in zip(input_csvs, varnames):
    slr_pacific_metric_upload(csv, export=True, varname='test') #var