## Cal-CRAI metric calculation: governance fire fuel reduction

* number of acres treated for fuel reduction per county

### * Note: This metric is no longer utilized in the Cal-CRAI, this notebook is obsolete

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import dask_geopandas
import dask.dataframe as dd
import matplotlib.pyplot as plt
import os
import sys

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [2]:
fs = s3fs.S3FileSystem()
bucket = 'ca-climate-index'
path = '2b_reproject/' 
pqt_list = [
    'governance/community_preparedness/usda_forest_service/governance_usda_fuel_reduction.parquet.gzip'
]

In [None]:
for pqt in pqt_list:
    ppath = path+pqt
    bucket_uri = f's3://{bucket}/{ppath}'
    print(pqt)
    df = gpd.read_parquet(bucket_uri)
fuel_reduction_data = df

In [None]:
fuel_reduction_data

In [None]:
fuel_reduction_data.columns

## Isolating the dataset for desired columns for metric calculation

The following columns were inspected and designated as redundant or not needed:
GIS_ACRES, NBR_UNITS1, TREATMENT1, DATE_PLANN, FY_PLANNED, TREATMENT_

In [None]:
fuel_reduction_data_columns = fuel_reduction_data[['DATE_PLANN', 'FISCAL_YEA', 'FY_PLANNED', 'NBR_UNITS_', 'UOM', 'USCB_GEOID']]
fuel_reduction_data_columns = fuel_reduction_data_columns.rename(columns={'USCB_GEOID':'tract'})
fuel_reduction_data_columns

## Checking what the date range of the data is
* deciding to use all and total them up

In [None]:
unique_funding_year = fuel_reduction_data_columns['FISCAL_YEA'].unique()

# Display all unique entries
print(unique_funding_year)

In [None]:
# ensuring that the only unit of measure is acrage
unique_acrage_units = fuel_reduction_data_columns['UOM'].unique()

# Display all unique entries
print(unique_acrage_units)

## Import Cal-CRAI census tract data and merging with our cleaned fuel reduction data based on tract

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

In [None]:
fuel_reduction_merge = pd.merge(ca_tract_county, fuel_reduction_data_columns, on='tract', how='right')
fuel_reduction_merge

In [None]:
# checking Humboldt results before we group by county and sum the totals
humboldt = fuel_reduction_merge[fuel_reduction_merge['county'] == 'humboldt']
humboldt

## Grouping by county and summing acres planned to be treated
* results in 20 CA counties that have data

In [None]:
# group by 'county' and sum the values in 'NBR_UNITS_'
county_count_fuel_reduction = fuel_reduction_merge.groupby('county')['NBR_UNITS_'].sum().reset_index()

# rename the counting column for clarity
county_count_fuel_reduction = county_count_fuel_reduction.rename(columns={'NBR_UNITS_': 'total_fuel_reduction_acres_planned'})

# print the number of unique counties
print(len(county_count_fuel_reduction))

county_count_fuel_reduction


## Merge once again with CA tracts based on county to attribute county sums to their respective tracts

In [None]:
fuel_reduction_metric = pd.merge(ca_tract_county, county_count_fuel_reduction, on='county', how='left')
fuel_reduction_metric

In [None]:
# checking if all Trinity tracts have the same acrage
# also checked that 2021 census data has 4 total Trinity tracts
trinity = fuel_reduction_metric[fuel_reduction_metric['county'] == 'trinity']
trinity

In [22]:
# save final df as a csv for upload to S3 bucket
fuel_reduction_metric.to_csv('governance_fuel_reduction_metric.csv', index=False)

## Function Call

In [16]:
@append_metadata
def fuel_reduction_upload(input_csv, export=False, varname=''):
    '''
    Uploads our fuel reduction metric to S3 bucket. The metric is:
    
    * Number of acres treated for fire fuel reduction per California county
    
    Data for this metric was sourced from the United States Department of Agriculture Forest Service at:
    https://data.fs.usda.gov/geodata/edw/datasets.php?xmlKeyword=Hazardous+Fuel+Treatment

    Methods
    -------
    Relevant data columns were isolated.
    Data was merged with CA tract/county data to attribute each data tract to a California county.
    Number of acres planned to be treated per county was calculated by grouping counties together and summing acre counts.
    Data was once again merged with CA tracts based on county to attribute acrage counts to each CA tract.
    
    Parameters
    ----------
    input_csv: string
        csv fire fuel reduction metric data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI fuel reduction metric to AWS
        True = will upload resulting df containing CAL CRAI fuel reduction metric to AWS

    Script
    ------
    governance_fuel_reduction.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data was merged with CA tracts and counties to generate Cal-CRAI metric.')
    print('Data transformation: data was grouped by county and summed to calculate treated acres.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [17]:
input_csv = 'governance_fuel_reduction_metric.csv'
varname = 'governance_usda_fuel_reduction'

fuel_reduction_upload(input_csv, export=False, varname='test') #varname