## Cal-CRAI Metric Calculation
Domain: Natural Systems \
Indicator: Ecosystem condition

This notebook calculates one metric, sourced from the California Protected Areas Database:
* Metric 1: Percentage of each California tract under management practices

In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/governance/natural_resource_conservation/cal_protected_areas/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [3]:
protected_areas_data = gpd.read_file('governance_cnra_protected_areas.gpkg')

In [None]:
protected_areas_data.columns

In [None]:
protected_areas_data.head()

In [None]:
protected_areas_data.plot()

In [None]:
# selecting relevant columns for processing
selected_columns = ['HOLDING_ID', 'ACCESS_TYP', 'UNIT_ID', 'UNIT_NAME', 'AGNCY_NAME',
       'SITE_NAME', 'COUNTY', 'ACRES', 'SRC_ATTR', 'SRC_ALIGN', 'GAP1_acres', 'GAP2_acres', 'GAP3_acres',
       'GAP4_acres', 'GAP_tot_ac',
       'USCB_COUNTYFP', 'USCB_TRACTCE', 'USCB_GEOID', 'geometry']

filtered_protected_areas_data = protected_areas_data[selected_columns]
filtered_protected_areas_data

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

In [9]:
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
ca_boundaries = ca_boundaries[['GEOID', 'geometry']]
ca_boundaries = ca_boundaries.rename(columns={'GEOID':'tract'})

In [None]:
ca_tract_county_spatial = pd.merge(ca_tract_county, ca_boundaries, on='tract', how='left')
ca_tract_county_spatial = ca_tract_county_spatial.rename(columns={'tract':'USCB_GEOID'})
ca_tract_county_spatial

In [11]:
geo_ca_tract_county = gpd.GeoDataFrame(ca_tract_county_spatial)

In [None]:
# Step 1: Check the CRS
print("Initial CRS of protected areas:", filtered_protected_areas_data.crs)
print("Initial CRS of CA tract/counties dataset:", geo_ca_tract_county.crs)

# Step 2: Reproject to an appropriate CRS for area calculations
gdf1 = filtered_protected_areas_data.to_crs(epsg=3310)  # California Albers
gdf2 = geo_ca_tract_county.to_crs(epsg=3310)  # California Albers

print("Reprojected CRS of cnra protected areas:", gdf1.crs)
print("Reprojected CRS of Counties:", gdf2.crs)

# Check for and fix invalid geometries in both GeoDataFrames
gdf1['geometry'] = gdf1['geometry'].apply(lambda geom: geom.buffer(0) if not geom.is_valid else geom)
gdf2['geometry'] = gdf2['geometry'].apply(lambda geom: geom.buffer(0) if not geom.is_valid else geom)

# Verify if all geometries are now valid
print("Protected areas geometries valid:", gdf1.is_valid.all())
print("County geometries valid:", gdf2.is_valid.all())

# Step 3: Calculate the total area of protected areas per tract
protected_area_tract = gdf1.dissolve(by='USCB_GEOID', aggfunc='sum')['geometry'].area.reset_index(name='protected_tract_area')

# Step 4: Calculate the total area of each county
county_area = gdf2.dissolve(by='USCB_GEOID', aggfunc='sum')['geometry'].area.reset_index(name='tract_area')

# Step 5: Merge the two datasets on the county column
merged_df = pd.merge(protected_area_tract, county_area, on='USCB_GEOID')

# Step 6: Calculate the spatial percentage of NTMPs per county
merged_df['protected_areas_percentage'] = (merged_df['protected_tract_area'] / merged_df['tract_area']) * 100

In [None]:
merged_df

In [None]:
print(merged_df.protected_areas_percentage.max())
print(merged_df.protected_areas_percentage.min())

In [None]:
# merge back to CA tract/county data so we have our 9129 census tracts
protected_areas_metric = pd.merge(ca_tract_county_spatial, merged_df, on='USCB_GEOID', how='left')
protected_areas_metric

In [16]:
protected_areas_metric.to_csv('natural_cnra_protected_areas_metric.csv')

## Function Call

In [17]:
@append_metadata
def protected_areas_upload(input_csv, export=False, varname=''):
    '''
    Uploads the protected areas metric to S3 bucket. The metric is:
    
    * % of tract under management practices

    Data for this metric was sourced from theCalifornia Natural Resources Agency: California Protected Areas Database at
    https://data.cnra.ca.gov/dataset/california-protected-areas-database/resource/27323846-4000-42a2-85b3-93ae40edeff9

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Data was reprojected to match California tract data.
    Using both datasets 'geometry' columns, a total area column for each tract entry was calculated.
    Estimated tract percentage under management practices was calculated by dividing estimated tract land under management
    by estimated total tract area.
    
    Parameters
    ----------
    input_csv: string
        csv protected areas metric data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI protected areas metric to AWS
        True = will upload resulting df containing CAL CRAI protected areas metric to AWS

    Script
    ------
    natural_protected_areas.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data reprojected to epsg 3310')
    print('Data transformation: a new column was created to estimate percentage of each tract with management practices')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    '''if os.path.exists(input_csv):
        os.remove(input_csv)'''

In [18]:
input_csv = 'natural_cnra_protected_areas_metric.csv'
variable = 'natural_cnra_protected_areas'

protected_areas_upload(input_csv, varname='test', export=True)