## This notebook calculates the timber management metric sourced from California Department of Forestry and Fire Protection (CalFire):
* % of county under timber management practices

In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/governance/natural_resource_conservation/calfire/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [3]:
timber_management_data = gpd.read_file('governance_calfire_timber_management.gpkg')
timber_management_data = timber_management_data.rename(columns={'USCB_GEOID':'tract', 'COUNTY':'county', 'USCB_COUNTYFP':'countyfp'})

In [None]:
timber_management_data.columns

In [None]:
unique_counties = timber_management_data['county'].unique()
print(len(unique_counties))

In [None]:
timber_management_data.head()

In [None]:
timber_management_data.plot()

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

In [9]:
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
ca_boundaries = ca_boundaries[['GEOID', 'geometry']]
ca_boundaries = ca_boundaries.rename(columns={'GEOID':'tract'})

In [None]:
ca_tract_county_spatial = pd.merge(ca_tract_county, ca_boundaries, on='tract', how='left')
ca_tract_county_spatial

In [11]:
geo_ca_tract_county = gpd.GeoDataFrame(ca_tract_county_spatial)

In [None]:
# Step 1: Check the CRS
print("Initial CRS of NTMPs:", timber_management_data.crs)
print("Initial CRS of Counties:", geo_ca_tract_county.crs)

# Step 2: Reproject to an appropriate CRS for area calculations (e.g., EPSG:3310 for California)
gdf1 = timber_management_data.to_crs(epsg=3310)  # California Albers
gdf2 = geo_ca_tract_county.to_crs(epsg=3310)  # California Albers

print("Reprojected CRS of NTMPs:", gdf1.crs)
print("Reprojected CRS of Counties:", gdf2.crs)

# Step 3: Calculate the total area of NTMPs per county
ntmp_area_county = gdf1.dissolve(by='countyfp', aggfunc='sum')['geometry'].area.reset_index(name='NTMP_Area')

# Step 4: Calculate the total area of each county
county_area = gdf2.dissolve(by='countyfp', aggfunc='sum')['geometry'].area.reset_index(name='County_Area')

# Step 5: Merge the two datasets on the county column
merged_df = pd.merge(ntmp_area_county, county_area, on='countyfp')

# Step 6: Calculate the spatial percentage of NTMPs per county
merged_df['NTMP_Percentage'] = (merged_df['NTMP_Area'] / merged_df['County_Area']) * 100

# Display the resulting dataframe
print(merged_df)

In [None]:
timber_management_metric = pd.merge(ca_tract_county_spatial, merged_df, on='countyfp', how='left')
timber_management_metric = timber_management_metric.drop(columns='geometry')
timber_management_metric = timber_management_metric.rename(columns={'NTMP_Percentage':'percent_under_timber_management'})
timber_management_metric

In [None]:
eighty_five = timber_management_metric[timber_management_metric['countyfp'] == '045']
eighty_five

In [15]:
timber_management_metric.to_csv('governance_timber_management_metric.csv')

## Function Call

In [16]:
@append_metadata
def timber_management_upload(input_csv, export=False, varname=''):
    '''
    Uploads the timber management metric to S3 bucket. The metric is:
    
    * % of county under timber management practices

    Data for this metric was sourced from the California Department of Forestry and Fire Protection (CalFire) at:
    https://gis.data.cnra.ca.gov/datasets/CALFIRE-Forestry::cal-fire-nonindustrial-timber-management-plans-ta83/about

    Note: data are for non-industrial timber management plans under 2,500 acres.
    Also Note: metric is intentionally county, even though the data itself was at census tract level as there were very small percentage of total CA tracts represented.

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Data was reprojected to match California tract data.
    Using both datasets 'geometry' columns, a total area column for each tract entry was calculated.
    Estimated tract percentage under timber management practices was calculated by dividing estimated tract land under management by estimated total tract area.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI timber management metric to AWS
        True = will upload resulting df containing CAL CRAI timber management metric to AWS

    Script
    ------
    governance_timber_management.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data reprojected to epsg 3310')
    print('Data transformation: a new column was created to estimate percentage of each tract with management practices')
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [17]:
input_csv = 'governance_timber_management_metric.csv'
variable = 'governance_calfire_timber_management'

timber_management_upload(input_csv=input_csv, export=True, varname='test')