# Cal-CRAI Index: Weighting Climate Risk Scenarios

**Order of operations**:
1) Metric handling \
   a) Retrieve & clean data \
   b) Merge data domains together \
   c) Calculate domain medians and adjusted scores \
   d) Pull climate domain and clean & merge

2) Custom weight per scenairo
   * save each scenario as csv
   
3) Bin data \
   a) Merge index scores per scenario together \
   b) Bin values into 5 sections per scenario \
   c) Visualize the binned weighted scenarios

4) Upload each scenario index .csv to AWS

In [1]:
import pandas as pd
import numpy as np
import os
import sys

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, delete_items
from scripts.utils.cal_crai_calculations import calculate_weighted_index, weight_domains, print_index_summary
from scripts.utils.cal_crai_plotting import index_plot, plot_hazard_score

## Step 1: Metric level
### 1a) Retrieve metric files and process

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '3_fair_data/index_data'
output_folder = 'aws_csvs'

pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False, print_name=False)

In [3]:
built_domain = pd.read_csv(r'aws_csvs/built_environment_domain_score.csv')
governance_domain = pd.read_csv(r'aws_csvs/governance_domain_score.csv')
natural_domain = pd.read_csv(r'aws_csvs/natural_systems_domain_score.csv')
society_domain = pd.read_csv(r'aws_csvs/society_economy_domain_score.csv')
climate_domain = pd.read_csv(r'aws_csvs/climate_hazard_scores.csv')

Clean data, isolate relevant columns

In [4]:
# Keep only the first two columns
built_domain_cleaned = built_domain.iloc[:, :2]
governance_domain_cleaned = governance_domain.iloc[:, :2]
natural_domain_cleaned = natural_domain.iloc[:, :2]
society_domain_cleaned = society_domain.iloc[:, :2]

# For climate domain, keep first seven columns
climate_domain_cleaned = climate_domain.iloc[:, :7]

### 1b) Merge data domains together

In [5]:
merged_domains = pd.merge(governance_domain_cleaned, built_domain_cleaned, on='GEOID', how='left')
merged_domains = pd.merge(merged_domains, natural_domain_cleaned, on='GEOID', how='left')
merged_domains = pd.merge(merged_domains, society_domain_cleaned, on='GEOID', how='left')

In [None]:
# Ensure all values in GEOID are strings
merged_domains['GEOID'] = merged_domains['GEOID'].astype(str)

# Add a '0' to the beginning of each GEOID and pad them to the correct length
merged_domains['GEOID'] = merged_domains['GEOID'].str.zfill(merged_domains['GEOID'].str.len().max() + 1)

# Rename the standardized columns for simplicity
merged_domains = merged_domains.rename(columns={
    'summed_indicators_governance_domain_min_max_standardized':'governance_domain_index',
    'summed_indicators_built_domain_min_max_standardized':'built_domain_index',
    'summed_indicators_natural_domain_min_max_standardized':'natural_systems_domain_index', 
    'summed_indicators_society_economy_domain_min_max_standardized':'society_economy_domain_index'     
})

# Display the dataframe
merged_domains

### 1c) Calculate data domain median and adjusted columns

In [None]:
all_domains = merged_domains.copy()
all_domains['built_environment_median'] = merged_domains['built_environment_domain_score'].median()
all_domains['natural_systems_median'] = merged_domains['natural_systems_domain_score'].median()
all_domains['society_economy_median'] = merged_domains['society_economy_domain_score'].median()

all_domains

In [None]:
def calculate_adjusted_value(row, category):
    category_median_col = f'{category}_median'
    category_col = f'{category}_domain_score'
    adjusted_col = f'{category}_tract_adjusted'
    return (row[category_col] - row[category_median_col]) / row[category_median_col]

# Loop through each row and calculate the adjusted value for each category
for index, row in all_domains.iterrows():
    for category in ['natural_systems', 'society_economy', 'built_environment']:
        adjusted_value = calculate_adjusted_value(row, category)
        adjusted_col = f'{category}_tract_adjusted'
        all_domains.at[index, adjusted_col] = adjusted_value

# Display the DataFrame
all_domains

### 1d) Pull climate domain and clean & merge

In [None]:
climate_domain_cleaned

Clean climate dataframe

In [None]:
# Ensure all values in GEOID are strings
climate_domain_cleaned['GEOID'] = climate_domain_cleaned['GEOID'].astype(str)

# Add a '0' to the beginning of each GEOID and pad them to the correct length
climate_domain_cleaned['GEOID'] = climate_domain_cleaned['GEOID'].str.zfill(merged_domains['GEOID'].str.len().max())

# Display the dataframe
climate_domain_cleaned

Merge core and climate dataframes together

In [None]:
community_capacity_climate_hazards = pd.merge(all_domains, climate_domain_cleaned, on='GEOID', how='left')
community_capacity_climate_hazards.head()

## Step 2) Custom Weighting to calculate Cal-CRAI score for specific climate risks
* utilize weight domains function to adjust domain coefficients to desired weighting factor

## Wildfire

In [None]:
# different weighting
df_fire = weight_domains(community_capacity_climate_hazards, society=0.89, built=0.87, natural=0.42)
df_fire = calculate_weighted_index(df_fire, 'wildfire_hazard_score')

# rename the index column to represent the weighting scenario
df_fire = df_fire.rename(columns={'calcrai_score':'wildfire_scenario_calcrai_score'})

df_fire.to_csv('calcrai_wildfire_scenario_index.csv', index=False)
print_index_summary(df_fire, 'wildfire_scenario_calcrai_score')

In [None]:
index_plot(df_fire, 'wildfire_scenario_calcrai_score', scenario='wildfire', vmin=0, vmax=3)

## Extreme Heat

In [None]:
df_heat = weight_domains(community_capacity_climate_hazards, society=0.87, built=0.81, natural=0.45)
df_heat = calculate_weighted_index(df_heat, 'extreme_heat_hazard_score')

# rename the index column to represent the weighting scenario
df_heat = df_heat.rename(columns={'calcrai_score':'extreme_heat_scenario_calcrai_score'})

df_heat.to_csv('calcrai_heat_scenario_index.csv', index=False)
print_index_summary(df_heat, 'extreme_heat_scenario_calcrai_score')

In [None]:
index_plot(df_heat, 'extreme_heat_scenario_calcrai_score', scenario='extreme heat', vmin=0, vmax=3)

## Inland Flooding

In [None]:
df_inland_flooding = weight_domains(community_capacity_climate_hazards, society=0.92, built=0.83, natural=0.21)
df_inland_flooding = calculate_weighted_index(df_inland_flooding, 'inland_flooding_hazard_score')

# rename the index column to represent the weighting scenario
df_inland_flooding = df_inland_flooding.rename(columns={'calcrai_score':'inland_flooding_scenario_calcrai_score'})

df_inland_flooding.to_csv('calcrai_inland_flooding_scenario_index.csv', index=False)
print_index_summary(df_inland_flooding, 'inland_flooding_scenario_calcrai_score')

In [None]:
index_plot(df_inland_flooding, 'inland_flooding_scenario_calcrai_score', scenario='inland flooding', vmin=0, vmax=3)

## Sea Level Rise

In [None]:
df_sea_level_rise = weight_domains(community_capacity_climate_hazards, society=0.92, built=0.75, natural=0.56)
df_sea_level_rise = calculate_weighted_index(df_sea_level_rise, 'sea_level_rise_hazard_score')

# rename the index column to represent the weighting scenario
df_sea_level_rise = df_sea_level_rise.rename(columns={'calcrai_score':'sea_level_rise_scenario_calcrai_score'})

df_sea_level_rise.to_csv('calcrai_sea_level_rise_scenario_index.csv', index=False)
print_index_summary(df_sea_level_rise, 'sea_level_rise_scenario_calcrai_score')

In [None]:
index_plot(df_sea_level_rise, 'sea_level_rise_scenario_calcrai_score', scenario='sea level rise',  vmin=0, vmax=3)

## Drought

In [None]:
df_drought = weight_domains(community_capacity_climate_hazards, society=0.85, built=0.95, natural=0.21)
df_drought = calculate_weighted_index(df_drought, 'drought_hazard_score')

# rename the index column to represent the weighting scenario
df_drought = df_drought.rename(columns={'calcrai_score':'drought_scenario_calcrai_score'})

df_drought.to_csv('calcrai_drought_scenario_index.csv', index=False)
print_index_summary(df_drought, 'drought_scenario_calcrai_score')

In [None]:
index_plot(df_drought, 'drought_scenario_calcrai_score', scenario='drought',  vmin=0, vmax=3)

## Step 3: Bin Data
### 3a) Merge index scores per scenario together

In [None]:
# List of dataframes to merge
dfs = [df_drought, df_fire, df_heat, df_sea_level_rise, df_inland_flooding]

# Keep only 'geoid' and columns ending with '_calcrai_score' in each dataframe
dfs = [df[['GEOID'] + [col for col in df.columns if col.endswith('_calcrai_score')]] for df in dfs]

# Merge all dataframes on 'geoid' using an outer join
from functools import reduce
merged_df = reduce(lambda left, right: pd.merge(left, right, on='GEOID', how='outer'), dfs)

# View the resulting merged dataframe
merged_df

### 3b) Bin values into 5 sections per scenario

In [None]:
# Binning into quartiles
binned_df = merged_df.copy()

# Define the columns to bin
columns_to_bin = [
    'drought_scenario_calcrai_score',
    'wildfire_scenario_calcrai_score',
    'extreme_heat_scenario_calcrai_score',
    'sea_level_rise_scenario_calcrai_score',
    'inland_flooding_scenario_calcrai_score',
]

# Apply qcut to each specified column and print the bin edges
for column in columns_to_bin:
    # Get the bin labels and edges
    binned_df[column + '_quartile'], bin_edges = pd.qcut(
        binned_df[column], 
        q=[0,0.2, 0.4, 0.6, 0.8, 1.0], 
        labels=False,
        # labels=['<20%', '20-40%', '40-60%', '60-80%', '>80%'],
        duplicates='drop', 
        retbins=True
    )
    print(f'Bin edges for {column}: {bin_edges}')

# Show the resulting DataFrame with the binned columns
binned_df = binned_df.drop(columns=columns_to_bin)
binned_df

In [25]:
# save as a csv file
binned_df.to_csv('binned_weighted_data.csv', index=False)

In [None]:
# for plotting labels
binned_df_viz = binned_df.copy()

# SLR is slightly difference since 0-40% is 0
binned_df_viz.loc[binned_df_viz['sea_level_rise_scenario_calcrai_score_quartile'] == 0, 'sea_level_rise_scenario_calcrai_score_quartile'] = '0-40%'
binned_df_viz.loc[binned_df_viz['sea_level_rise_scenario_calcrai_score_quartile'] == 1, 'sea_level_rise_scenario_calcrai_score_quartile'] = '40-60%'
binned_df_viz.loc[binned_df_viz['sea_level_rise_scenario_calcrai_score_quartile'] == 2, 'sea_level_rise_scenario_calcrai_score_quartile'] = '60-80%'
binned_df_viz.loc[binned_df_viz['sea_level_rise_scenario_calcrai_score_quartile'] == 3, 'sea_level_rise_scenario_calcrai_score_quartile'] = '80+%'

# remainder of risks
binned_df_viz = binned_df_viz.replace(0, '0-20%')
binned_df_viz = binned_df_viz.replace(1, '20-40%')
binned_df_viz = binned_df_viz.replace(2, '40-60%')
binned_df_viz = binned_df_viz.replace(3, '60-80%')
binned_df_viz = binned_df_viz.replace(4, '80+%')

In [None]:
binned_df_viz

### 3c) Visualize the binned weighted scenarios

In [None]:
# List of binned columns
binned_columns = [
    'drought_scenario_calcrai_score_quartile',
    'wildfire_scenario_calcrai_score_quartile',
    'extreme_heat_scenario_calcrai_score_quartile',
    'sea_level_rise_scenario_calcrai_score_quartile',
    'inland_flooding_scenario_calcrai_score_quartile',
]

# Loop through each binned column and plot
for column in binned_columns:
    scenario_name = column.split('_scenario')[0]  # Get everything before '_scenario'
    scenario_name = scenario_name.replace('_', ' ')

    index_plot(binned_df_viz, column, scenario=scenario_name, plot_title=True, vmin=0, vmax=5, plot_type='discrete', save_name=f'binned {scenario_name}')

## Step 4) Upload each scenario index .csv to AWS

In [None]:
# upload to aws bucket
bucket_name = 'ca-climate-index'
directory = '3_fair_data/index_data'

files = ['calcrai_wildfire_scenario_index.csv',
        'calcrai_heat_scenario_index.csv',
        'calcrai_inland_flooding_scenario_index.csv',
        'calcrai_sea_level_rise_scenario_index.csv',
        'calcrai_drought_scenario_index.csv']

for file in files:
    upload_csv_aws([file], bucket_name, directory)

## Delete desired csv files
* all that were generated from this notebook by default

In [None]:
folders_to_delete = ["aws_csvs"]
csv_files_to_delete = ["calcrai_wildfire_scenario_index.csv", 
                       "calcrai_heat_scenario_index.csv",
                       "calcrai_inland_flooding_scenario_index.csv", 
                       "calcrai_sea_level_rise_scenario_index.csv",
                       "calcrai_drought_scenario_index.csv",
                       "binned_weighted_data.csv"]

png_files_to_delete = [
                        "binned drought.png", 
                        "binned extreme heat.png", 
                        "binned inland flooding.png", 
                        "binned sea level rise.png", 
                        "binned wildfire.png"
]

delete_items(folders_to_delete, csv_files_to_delete, png_files=png_files_to_delete)