## Cal-CRAI metric calculation: governance fire fuel reduction

* number of acres treated for fuel reduction per county

In [2]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import dask_geopandas
import dask.dataframe as dd
import matplotlib.pyplot as plt
import os
import sys

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [10]:
fs = s3fs.S3FileSystem()
bucket = 'ca-climate-index'
path = '2b_reproject/' 
pqt_list = [
    'governance/community_preparedness/usda_forest_service/governance_usda_fuel_reduction.parquet.gzip'
]

In [14]:
for pqt in pqt_list:
    ppath = path+pqt
    bucket_uri = f's3://{bucket}/{ppath}'
    print(pqt)
    df = gpd.read_parquet(bucket_uri)
fuel_reduction_data = df

governance/community_preparedness/usda_forest_service/governance_usda_fuel_reduction.parquet.gzip


In [16]:
fuel_reduction_data

Unnamed: 0_level_0,index,SUID,ORG,ACTIVITY_C,ACTIVITY,LOCAL_QUAL,ASU_NBR_UN,ASU_UOM,ADMIN_REGI,ADMIN_FORE,...,GIS_ACRES,PURPOSE_CO,ACT_CREATE,ACT_MODIFI,SHAPE_AREA,SHAPE_LEN,geometry,index_right,USCB_GEOID,USCB_NAME
__null_dask_index__,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,537408,0517531454540000000,051753,4220,Commercial Thin,MOSAIC,24.4,ACRES,05,17,...,24.450,,2023-10-05,2023-12-07,1.037190e-05,0.020631,"POLYGON ((-120.80767 39.57617, -120.80294 39.5...",3216,06091010000,100
1,537409,0517531454540000000,051753,1150,Rearrangement of Fuels,,24.4,ACRES,05,17,...,24.450,,2023-10-05,2023-12-07,1.037190e-05,0.020631,"POLYGON ((-120.80767 39.57617, -120.80294 39.5...",3216,06091010000,100
2,537410,0517531454540000000,051753,1130,Burning of Piled Material,LANDINGS,24.4,ACRES,05,17,...,24.450,,2023-10-05,2023-12-07,1.037190e-05,0.020631,"POLYGON ((-120.80767 39.57617, -120.80294 39.5...",3216,06091010000,100
3,537411,0517531454550000000,051753,4220,Commercial Thin,MOSAIC,156.6,ACRES,05,17,...,156.614,,2023-10-05,2023-12-07,6.644167e-05,0.034004,"POLYGON ((-120.80280 39.57398, -120.80290 39.5...",3216,06091010000,100
4,537412,0517531454550000000,051753,1150,Rearrangement of Fuels,,156.6,ACRES,05,17,...,156.614,,2023-10-05,2023-12-07,6.644167e-05,0.034004,"POLYGON ((-120.80280 39.57398, -120.80290 39.5...",3216,06091010000,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,546281,051653322GZFBF41000,051653,1180,Fuel Break,,9.0,ACRES,05,16,...,8.790,,2023-07-11,2023-09-29,3.661738e-06,0.009611,"POLYGON ((-120.00945 38.26087, -120.00963 38.2...",2695,06109003103,31.03
1015,546282,051653323GZFB426000,051653,1180,Fuel Break,,2.0,ACRES,05,16,...,2.232,,2023-07-19,2023-09-29,9.297831e-07,0.006193,"POLYGON ((-119.99407 38.25826, -119.99407 38.2...",2695,06109003103,31.03
1016,546283,051653230GZFBF14000,051653,1180,Fuel Break,,13.0,ACRES,05,16,...,12.525,,2023-07-11,2023-09-29,5.215725e-06,0.015585,"POLYGON ((-120.07417 38.23633, -120.07417 38.2...",2701,06109002102,21.02
1017,546284,051653230GZFB852000,051653,1180,Fuel Break,,13.0,ACRES,05,16,...,13.104,,2023-08-01,2023-09-29,5.456517e-06,0.015812,"POLYGON ((-120.07433 38.22811, -120.07409 38.2...",2701,06109002102,21.02


In [15]:
fuel_reduction_data.columns

Index(['index', 'SUID', 'ORG', 'ACTIVITY_C', 'ACTIVITY', 'LOCAL_QUAL',
       'ASU_NBR_UN', 'ASU_UOM', 'ADMIN_REGI', 'ADMIN_FORE', 'ADMIN_DIST',
       'STATE_ABBR', 'OWNERSHIP_', 'PROC_REGIO', 'PROC_FORES', 'LAND_SUITA',
       'PRODUCTIVI', 'SLOPE', 'ELEVATION', 'ASPECT', 'MGT_AREA_C',
       'MGT_PRESCR', 'NBR_UNITS_', 'NBR_UNITS1', 'UOM', 'DATE_PLANN',
       'DATE_AWARD', 'DATE_COMPL', 'FISCAL_YEA', 'FY_AWARDED', 'FISCAL_Y_1',
       'FY_PLANNED', 'KEYPOINT', 'FUND_CODE', 'METHOD_COD', 'EQUIPMENT_',
       'COST_PER_U', 'NEPA_PROJE', 'NEPA_DOC_N', 'IMPLEMENTA', 'IMPLEMEN_1',
       'IMPLEMEN_2', 'ACCOMPLISH', 'ACCOMPLI_1', 'ACTIVITY_1', 'ACTIVITY_U',
       'FACTS_ID', 'SUBUNIT', 'FEATURE_TY', 'TREATMENT_', 'ACTIVITY_2',
       'ACTIVITY_S', 'WORKFORCE_', 'NEPA_PRO_1', 'SUID_CN', 'ISWUI', 'CWPP',
       'CAT_NM', 'EQUIPMENT', 'METHOD', 'TREATMENT1', 'STAGE', 'STAGE_VALU',
       'DATA_SOURC', 'DATA_SOU_1', 'ACCURACY', 'FS_UNIT_ID', 'FS_UNIT_NA',
       'CRC_VALUE', 'ETL_MODIFI', '

## Isolating the dataset for desired columns for metric calculation

The following columns were inspected and designated as redundant or not needed:
GIS_ACRES, NBR_UNITS1, TREATMENT1, DATE_PLANN, FY_PLANNED, TREATMENT_

In [35]:
fuel_reduction_data_columns = fuel_reduction_data[['DATE_PLANN', 'FISCAL_YEA', 'FY_PLANNED', 'NBR_UNITS_', 'UOM', 'USCB_GEOID']]
fuel_reduction_data_columns = fuel_reduction_data_columns.rename(columns={'USCB_GEOID':'tract'})
fuel_reduction_data_columns

Unnamed: 0_level_0,DATE_PLANN,FISCAL_YEA,FY_PLANNED,NBR_UNITS_,UOM,tract
__null_dask_index__,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2023-04-04,2023,2023.0,24.4,ACRES,06091010000
1,2023-04-04,2023,2023.0,24.4,ACRES,06091010000
2,2025-10-01,2026,2026.0,24.4,ACRES,06091010000
3,2023-04-04,2023,2023.0,156.6,ACRES,06091010000
4,2023-04-04,2023,2023.0,156.6,ACRES,06091010000
...,...,...,...,...,...,...
1014,2023-07-11,2023,2023.0,9.0,ACRES,06109003103
1015,2023-07-11,2023,2023.0,2.0,ACRES,06109003103
1016,2023-07-11,2023,2023.0,13.0,ACRES,06109002102
1017,2023-07-11,2023,2023.0,13.0,ACRES,06109002102


## Checking what the date range of the data is
* deciding to use all and total them up

In [36]:
unique_funding_year = fuel_reduction_data_columns['FISCAL_YEA'].unique()

# Display all unique entries
print(unique_funding_year)

[2023 2026 2017 2020 2018 2013 2007 2010 2024 2012 2022 2005 2016 2014
 2009 2019 2004 2015 2008 2027 2003 2021 1996 2006 2025 2011]


In [60]:
# ensuring that the only unit of measure is acrage
unique_acrage_units = fuel_reduction_data_columns['UOM'].unique()

# Display all unique entries
print(unique_acrage_units)

['ACRES']


## Import Cal-CRAI census tract data and merging with our cleaned fuel reduction data based on tract

In [24]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [37]:
fuel_reduction_merge = pd.merge(ca_tract_county, fuel_reduction_data_columns, on='tract', how='right')
fuel_reduction_merge

Unnamed: 0,tract,countyfp,county,DATE_PLANN,FISCAL_YEA,FY_PLANNED,NBR_UNITS_,UOM
0,06091010000,091,sierra,2023-04-04,2023,2023.0,24.4,ACRES
1,06091010000,091,sierra,2023-04-04,2023,2023.0,24.4,ACRES
2,06091010000,091,sierra,2025-10-01,2026,2026.0,24.4,ACRES
3,06091010000,091,sierra,2023-04-04,2023,2023.0,156.6,ACRES
4,06091010000,091,sierra,2023-04-04,2023,2023.0,156.6,ACRES
...,...,...,...,...,...,...,...,...
1014,06109003103,109,tuolumne,2023-07-11,2023,2023.0,9.0,ACRES
1015,06109003103,109,tuolumne,2023-07-11,2023,2023.0,2.0,ACRES
1016,06109002102,109,tuolumne,2023-07-11,2023,2023.0,13.0,ACRES
1017,06109002102,109,tuolumne,2023-07-11,2023,2023.0,13.0,ACRES


In [44]:
# checking Humboldt results before we group by county and sum the totals
humboldt = fuel_reduction_merge[fuel_reduction_merge['county'] == 'humboldt']
humboldt

Unnamed: 0,tract,countyfp,county,DATE_PLANN,FISCAL_YEA,FY_PLANNED,NBR_UNITS_,UOM
8,6023010102,23,humboldt,2023-01-30,2023,2023.0,11.1,ACRES
9,6023010102,23,humboldt,2023-01-30,2023,2023.0,11.1,ACRES
10,6023010102,23,humboldt,2023-01-30,2023,2023.0,11.1,ACRES


## Grouping by county and summing acres planned to be treated
* results in 20 CA counties that have data

In [50]:
# group by 'county' and sum the values in 'NBR_UNITS_'
county_count_fuel_reduction = fuel_reduction_merge.groupby('county')['NBR_UNITS_'].sum().reset_index()

# rename the counting column for clarity
county_count_fuel_reduction = county_count_fuel_reduction.rename(columns={'NBR_UNITS_': 'total_fuel_reduction_acres_planned'})

# print the number of unique counties
print(len(county_count_fuel_reduction))

county_count_fuel_reduction


20


Unnamed: 0,county,total_fuel_reduction_acres_planned
0,alpine,13053.6
1,amador,4961.0
2,butte,658.0
3,calaveras,40443.9
4,el dorado,39368.0
5,humboldt,33.3
6,kern,18.0
7,lassen,429.6
8,mariposa,1809.6
9,nevada,1115.8


## Merge once again with CA tracts based on county to attribute county sums to their respective tracts

In [51]:
fuel_reduction_metric = pd.merge(ca_tract_county, county_count_fuel_reduction, on='county', how='left')
fuel_reduction_metric

Unnamed: 0,tract,countyfp,county,total_fuel_reduction_acres_planned
0,06085504321,085,santa clara,
1,06085504410,085,santa clara,
2,06085507003,085,santa clara,
3,06085507004,085,santa clara,
4,06085502204,085,santa clara,
...,...,...,...,...
9124,06059001303,059,orange,
9125,06059001304,059,orange,
9126,06059001401,059,orange,
9127,06013367200,013,contra costa,


In [52]:
# checking if all Trinity tracts have the same acrage
# also checked that 2021 census data has 4 total Trinity tracts
trinity = fuel_reduction_metric[fuel_reduction_metric['county'] == 'trinity']
trinity

Unnamed: 0,tract,countyfp,county,total_fuel_reduction_acres_planned
1162,6105000102,105,trinity,4088.0
1163,6105000101,105,trinity,4088.0
2618,6105000200,105,trinity,4088.0
3554,6105000500,105,trinity,4088.0


In [53]:
# save final df as a csv for upload to S3 bucket
fuel_reduction_metric.to_csv('governance_fuel_reduction_metric.csv', index=False)

## Function Call

In [61]:
@append_metadata
def fuel_reduction_upload(input_csv, export=False, varname=''):
    '''
    Uploads our fuel reduction metric to S3 bucket. The metric is:
    
    * Number of acres treated for fire fuel reduction per California county
    
    Data for this metric was sourced from the United States Department of Agriculture Forest Service at:
    https://data.fs.usda.gov/geodata/edw/datasets.php?xmlKeyword=Hazardous+Fuel+Treatment

    Methods
    -------
    Relevant data columns were isolated.
    Data was merged with CA tract/county data to attribute each data tract to a California county.
    Number of acres planned to be treated per county was calculated by grouping counties together and summing acre counts.
    Data was once again merged with CA tracts based on county to attribute acrage counts to each CA tract.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI fuel reduction metric to AWS
        True = will upload resulting df containing CAL CRAI fuel reduction metric to AWS

    Script
    ------
    governance_fuel_reduction.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data was merged with CA tracts and counties to generate Cal-CRAI metric.')
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [62]:
input_csv = 'governance_fuel_reduction_metric.csv'
varname = 'governance_usda_fuel_reduction'

fuel_reduction_upload(input_csv, export=False, varname='test') #varname