# Cal-CRAI Index: Equal-Weighting Climate Risk

**Order of operations**:
1) Metric handling \
   a) Retrieve & clean data \
   b) Merge data domains together \
   c) Calculate domain medians and adjusted scores \
   d) Pull climate domain and clean & merge

2) Calculate index

3) Visualize index results

4) Bin values into 5 sections based on equal percentiles
   - Visualize
   
5) Export data as a GeoDataBase and upload to AWS
   - Add metadata to GeoDataBase

In [28]:
import pandas as pd
import os
import sys
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import shutil
from osgeo import ogr
from collections import OrderedDict

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.cal_crai_calculations import calculate_equal_weighted_index, add_census_tracts
from scripts.utils.cal_crai_plotting import index_plot, plot_region_domain

## Step 1: Metric level
### 1a) Retrieve metric files and process

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '3_fair_data/index_data'
output_folder = 'aws_csvs'

pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False, print_name=False)

In [30]:
built_domain = pd.read_csv(r'aws_csvs/built_environment_domain_score.csv')
governance_domain = pd.read_csv(r'aws_csvs/governance_domain_score.csv')
natural_domain = pd.read_csv(r'aws_csvs/natural_systems_domain_score.csv')
society_domain = pd.read_csv(r'aws_csvs/society_economy_domain_score.csv')
climate_domain = pd.read_csv(r'aws_csvs/climate_hazard_scores.csv')

In [31]:
# Keep only the first two columns
built_domain_cleaned = built_domain.iloc[:, :2]
governance_domain_cleaned = governance_domain.iloc[:, :2]
natural_domain_cleaned = natural_domain.iloc[:, :2]
society_domain_cleaned = society_domain.iloc[:, :2]
climate_domain_cleaned = climate_domain.iloc[:, :2]

### 1b) Merge data domains together

In [32]:
merged_domains = pd.merge(governance_domain_cleaned, built_domain_cleaned, on='GEOID', how='left')
merged_domains = pd.merge(merged_domains, natural_domain_cleaned, on='GEOID', how='left')
merged_domains = pd.merge(merged_domains, society_domain_cleaned, on='GEOID', how='left')

In [None]:
# Ensure all values in GEOID are strings
merged_domains['GEOID'] = merged_domains['GEOID'].astype(str)

# Add a '0' to the beginning of each GEOID and pad them to the correct length
merged_domains['GEOID'] = merged_domains['GEOID'].str.zfill(merged_domains['GEOID'].str.len().max() + 1)

# Display the dataframe
merged_domains

### 1c) Calculate data domain median and adjusted columns

In [None]:
all_domains = merged_domains.copy()
all_domains['built_environment_median'] = merged_domains['built_environment_domain_score'].median()
all_domains['natural_systems_median'] = merged_domains['natural_systems_domain_score'].median()
all_domains['society_economy_median'] = merged_domains['society_economy_domain_score'].median()

all_domains

In [None]:
def calculate_adjusted_value(row, category):
    category_median_col = f'{category}_median'
    category_col = f'{category}_domain_score'
    adjusted_col = f'{category}_tract_adjusted'
    return (row[category_col] - row[category_median_col]) / row[category_median_col]

# Loop through each row and calculate the adjusted value for each category
for index, row in all_domains.iterrows():
    for category in ['natural_systems', 'society_economy', 'built_environment']:
        adjusted_value = calculate_adjusted_value(row, category)
        adjusted_col = f'{category}_tract_adjusted'
        all_domains.at[index, adjusted_col] = adjusted_value

# Display the DataFrame
all_domains

## 1d) Pull climate domain and clean & merge

In [None]:
climate_domain_cleaned

In [None]:
# Ensure all values in GEOID are strings
climate_domain_cleaned['GEOID'] = climate_domain_cleaned['GEOID'].astype(str)

# Add a '0' to the beginning of each GEOID and pad them to the correct length
climate_domain_cleaned['GEOID'] = climate_domain_cleaned['GEOID'].str.zfill(merged_domains['GEOID'].str.len().max())

# Display the dataframe
climate_domain_cleaned

In [None]:
all_domains['hazard_score'] = climate_domain[['hazard_score']]
all_domains

## Step 2) Calculate Index

In [None]:
calcrai_index_equal_weight = calculate_equal_weighted_index(all_domains)
calcrai_index_equal_weight

In [41]:
# save df as csv
calcrai_index_equal_weight.to_csv('calcrai_score.csv', index=False)

In [None]:
# upload to aws bucket
bucket_name = 'ca-climate-index'
directory = '3_fair_data/index_data'
file = 'calcrai_score.csv'

upload_csv_aws([file], bucket_name, directory)

Let's print some results to see the range of values in the synthetic data. 

In [None]:
print('Min score / less resilience: ', calcrai_index_equal_weight['calcrai_score'].min())
print('Max score / more resilience: ', calcrai_index_equal_weight['calcrai_score'].max())
print('Mean score / average resilience: ', calcrai_index_equal_weight['calcrai_score'].mean())
print('Median score / median resilience: ', calcrai_index_equal_weight['calcrai_score'].median())

## Step 3: Visualize Results

In [None]:
index_plot(calcrai_index_equal_weight, column='calcrai_score', vmin=0, vmax=3)

In [None]:
plot_region_domain(calcrai_index_equal_weight, counties_to_plot=None,
                   plot_all=True, savefig=False, font_color='black', domain='',
                   domain_label_map=None, vmin=0, vmax=3, column_to_plot='calcrai_score',cmap='RdYlBu',
                   intro_title='Climate Risk and Adaptation Index',
                   legend_label = 'Cal-CRAI Score')

In [None]:
plot_region_domain(calcrai_index_equal_weight, counties_to_plot=None, region='slr_coast',
                   plot_all=False, savefig=False, font_color='black', domain='',
                   domain_label_map=None, vmin=0, vmax=3, column_to_plot='calcrai_score',cmap='RdYlBu',
                   intro_title='Climate Risk and Adaptation Index',
                   legend_label = 'Cal-CRAI Score') 

## Step 4: Bin the equal-weighted index score into 5 quartiles & visualize

In [None]:
# Binning into quartiles
binned_equal_df = calcrai_index_equal_weight.copy()

# Define the columns to bin
columns_to_bin = [
    'calcrai_score'
]

# Apply qcut to each specified column and print the bin edges
for column in columns_to_bin:
    # Get the bin labels and edges
    binned_equal_df[column + '_quartile'], bin_edges = pd.qcut(
        binned_equal_df[column], 
        q=[0,0.2, 0.4, 0.6, 0.8, 1.0], 
        labels=False,
        # labels=['<20%', '20-40%', '40-60%', '60-80%', '>80%'],
        duplicates='drop', 
        retbins=True
    )
    print(f'Bin edges for {column}: {bin_edges}')

# Show the resulting DataFrame with the binned columns
binned_equal_df = binned_equal_df[['GEOID','calcrai_score_quartile']]
binned_equal_df

In [None]:
# Create a copy and chain the .replace() calls
binned_df_viz = binned_equal_df.copy()
binned_df_viz['calcrai_score_quartile'] = (
    binned_df_viz['calcrai_score_quartile']
    .replace(0, '0-20%')
    .replace(1, '20-40%')
    .replace(2, '40-60%')
    .replace(3, '60-80%')
    .replace(4, '80+%')
)
binned_df_viz.head()

In [None]:
# List of binned columns
binned_columns = [
    'calcrai_score_quartile'
]

# Loop through each binned column and plot
for column in binned_columns:
    scenario_name = 'equal weight'

    index_plot(binned_df_viz, column, save_name=f'binned {scenario_name}', scenario=scenario_name, plot_title=True, vmin=0, vmax=5, plot_type='discrete')

## Step 5: Export as a GeoDataBase
### 5a) Prepare data

Merge with census tract and geometry boundaries before exporting to geodatabase

In [50]:
equal_weight_merged = add_census_tracts(calcrai_index_equal_weight)

In [None]:
equal_weight_merged.columns

Isolate desired columns for the GeoDataBase file

In [52]:
geodatabase_columns = [
    'GEOID', 
    'governance_domain_score', 'built_environment_domain_score', 'natural_systems_domain_score', 'society_economy_domain_score',
    'built_environment_tract_adjusted', 'natural_systems_tract_adjusted', 'society_economy_tract_adjusted', 
    'community_capacity',  'hazard_score', 'calcrai_score', 
    'STATEFP', 'COUNTYFP', 'TRACTCE', 'NAME', 'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT', 'INTPTLON', 'geometry'
]

equal_weight_merged_cleaned = equal_weight_merged[geodatabase_columns]

In [None]:
# Create geometry column
geometry = [Point(xy) for xy in zip(equal_weight_merged['INTPTLON'], equal_weight_merged['INTPTLAT'])]

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(equal_weight_merged, geometry=geometry, crs="EPSG:4269")  # Specify CRS

# Define the output path
gdb_path = r'../index_method/Cal_CRAI.gdb'

# Save to geodatabase
gdf.to_file(gdb_path, layer="cal_climate_resilience_adaptation_index", driver="OpenFileGDB")

### 5b) Add metadata do the GeoDataBase file

In [None]:
driver = ogr.GetDriverByName("OpenFileGDB")
gdb = driver.Open(gdb_path, 1)  # Open in write mode

# Create a new table for metadata in the geodatabase
metadata_table = gdb.CreateLayer("Metadata", geom_type=ogr.wkbNone)

# Define the fields for the metadata table
metadata_table.CreateField(ogr.FieldDefn("Column_Name", ogr.OFTString))
metadata_table.CreateField(ogr.FieldDefn("Data_Type", ogr.OFTString))
metadata_table.CreateField(ogr.FieldDefn("Description", ogr.OFTString))

# Add metadata records for each column you want to document
fields = [
    ("GEOID", "object", "Geographical identifier for the census tract. GEOID numbers are generated with the following: STATEFP + COUNTYFP + TRACTCE"),
    ("governance_domain_score", "float64", "Governance domain score, used in the Cal-CRAI calculation"),
    ("built_domain_score", "float64", "Built Environment domain index score, the value used to calculate the Built Environment adjusted score"),
    ("natural_systems_domain_score", "float64", "Natural Systems domain index score, the value used to calculate the Natural Systems adjusted score"),
    ("society_economy_domain_score", "float64", "Society and Economy domain index score, the value used to calculate the society and economy adjusted score"),
    ("natural_systems_tract_adjusted", "float64", "Adjusted value for Natural Systems per census tract"),
    ("society_economy_tract_adjusted", "float64", "Adjusted value for Society and Economy per census tract"),
    ("built_tract_adjusted", "float64", "Adjusted value for Built Environment by tract"),
    ("community_capacity", "float64", "Numerator score of the Cal-CRAI calculation, with each coefficient being 1"),
    ("hazard_score", "float64", "Climate risk assessment score from all climate scenarios: Extreme Heat, Drought, Inland Flooding, Sea Level Rise, and Wildfires"),
    ("calcrai_score", "float64", "Calculated Cal-CRAI scores per California census tract"),
    ("STATEFP", "object", "State FIPS code"),
    ("COUNTYFP", "object", "County FIPS code"),
    ("TRACTCE", "object", "The unique identifier for a Census tract within a county"),
    ("NAME", "object", "This is a simpler, shorter version of the tract's identifier, often used for display purposes."),
    ("NAMELSAD", "object", "Combines the tract's type with its specific number, creating a more human-readable label."),
    ("MTFCC", "object", "A code assigned to describe the feature's type in the U.S. Census Bureau's geographic database. "),
    ("FUNCSTAT", "object", "Functional status of the geographical unit"),
    ("ALAND", "int64", "Land area (in square meters)"),
    ("AWATER", "int64", "Water area (in square meters)"),
    ("INTPTLAT", "object", "Latitude of the intersection point"),
    ("INTPTLON", "object", "Longitude of the intersection point"),
    ("geometry", "geometry", "Spatial component of feature")
]

# Add records for each field to the metadata table
feature_def = metadata_table.GetLayerDefn()
for field in fields:
    feature = ogr.Feature(feature_def)
    feature.SetField("Column_Name", field[0])
    feature.SetField("Data_Type", field[1])
    feature.SetField("Description", field[2])
    metadata_table.CreateFeature(feature)
    feature = None  # Cleanup

# Create an OrderedDict for metadata
metadata_dict = OrderedDict()

# Add metadata in order
metadata_dict["Title"] = "California Climate Risk and Adaptation Index"
metadata_dict["Abstract"] = (
    "The Cal-CRAI evaluates community capacity — the ability of a community to adapt to and recover from a climate event \n"
    " — and hazard risk, which refers to the degree to which communities are exposed to climate risks and the historical losses \n"
    "resulting from such events. This assessment spans a broad set of socioeconomic, built, and natural environment indicators, \n"
    "creating a composite resilience index score that provides insights into California's vulnerability and resilience to climate \n"
    "change impacts at a high spatial scale. The climate risks used have been previously identified as “highest priority” for California: \n"
    "- Extreme heat \n"
    "- Wildfire \n"
    "- Drought \n"
    "- Inland flooding \n"
    "- Sea level rise"
)
metadata_dict["DOI"] = "https://doi.org/10.5281/zenodo.13840187"
metadata_dict["Owner"] = "California Air and Resources Board"
metadata_dict["Produced By"] = "Eagle Rock Analytics, Inc. for California Air Resources Board"
metadata_dict["Point of Contact"] = "Eagle Rock Analytics, Inc.: info@eaglerockanalytics"
metadata_dict["Spatial Reference System"] = "EPSG:4269 (NAD83 - North American Datum 1983)"

metadata_dict["Dataset Dates"] = "12.17.2024"
metadata_dict["Geographic Extent"] = (
    "California, United States; Bounding Box: West: -124.48, East: -114.13, South: 32.53, North: 42.01"
)
metadata_dict["Version"] = "1.0"
metadata_dict["Metadata Date Stamp"] = "2024-12-17"
metadata_dict["Metadata Language"] = "English"
metadata_dict["Metadata Character Set"] = "UTF-8"
metadata_dict["Dataset Language"] = "English"
metadata_dict["Dataset Character Set"] = "UTF-8"
metadata_dict["Topics"] = (
    "Climate Risk Assessment, Climate Change Adaptation, Environmental Resilience, "
    "Socioeconomic Vulnerability, Regional Planning, California, Wildfire, Drought, "
    "Sea Level Rise, Inland Flooding, Extreme Heat"
)

# Add the metadata to the geodatabase
for key, value in metadata_dict.items():
    gdb.SetMetadataItem(key, value)

# Retrieve and print metadata in the order they were added
print('------')
for key, value in metadata_dict.items():
    print(f"{key}: {value}")
print('------')

# Save and close the GDB
gdb.Destroy()

print("Metadata table created successfully in the geodatabase.")

Zip geodatabase files

In [None]:
# Path to the geodatabase directory
gdb_path = '../index_method/Cal_CRAI.gdb'

# Path for the zipped file
zip_path = 'Cal_CRAI.zip'

# Create a zip archive of the geodatabase
shutil.make_archive('Cal_CRAI', 'zip', gdb_path)

print(f"Geodatabase zipped at {zip_path}")

## Step 6: Upload to AWS

In [None]:
# AWS bucket and directory
bucket_name = 'ca-climate-index'
directory = '3_fair_data/index_data'

# File to upload (the zipped file)
file = 'Cal_CRAI.zip'

# Use the existing function to upload
upload_csv_aws([file], bucket_name, directory)