### Cal-CRAI metric calculation: Crop Loss
This notebook calculates crop loss metrics across 2 different climate risks:
* Drought/crop loss: average # of acres lost from drought per year
* Drought/crop loss: average cost of crop loss from drought per year
* Heat/crop loss: average # of acres lost from extreme heat per year
* Heat/crop loss: average cost of crop loss from extreme heat per year

In [4]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import matplotlib.pyplot as plt
import os
import sys
import numpy as np

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/extreme_heat/loss/usda/usda_crop_loss_heat_files/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

In [None]:
all_events = pd.read_csv(r'csv_folder/usda_crop_loss_merged.csv')
all_events.head(5)

In [None]:
all_events.columns

In [None]:
columns = ['year', 'county_code', 'county_name',
          'damage_description', 'determined_acres', 'indemnity_amount']
all_events.damage_description.unique()

#### Quick check that the drought version of this file is identical to the heat version so we do not have to pull twice

In [None]:
heat_v = all_events # heat version

aws_dir = '1_pull_data/climate_risk/drought/loss/usda/usda_crop_loss_heat_files/'
folder = 'csv_folder'
pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)
drought_v = pd.read_csv(r'csv_folder/usda_crop_loss_CA_final.csv') # drought version

In [16]:
# isolate for relevant columns in both
heat_v = heat_v[columns]
drought_v = drought_v[columns]

# subset for heat and drought to confirm
event_types = ['Drought', 'Heat']
heat_v_events = heat_v[heat_v['damage_description'].isin(event_types)]
drought_v_events = drought_v[drought_v['damage_description'].isin(event_types)]

# reset index and drop old mismatch index col
heat_v_events = heat_v_events.sort_values(['year','determined_acres']).reset_index().drop(columns=['index'])
drought_v_events = drought_v_events.sort_values(['year','determined_acres']).reset_index().drop(columns=['index'])

# check they are identical
heat_v_events.equals(drought_v_events)
heat_v_events.compare(drought_v_events) # empty -- meaning they are identical

## heat version is identical to the drought version, only need to pull one to calculate both
os.remove(r'csv_folder/usda_crop_loss_CA_final.csv')

### Heat Metrics
* Heat/crop loss: average # of acres lost from extreme heat per year
* Heat/crop loss: average cost of crop loss from extreme heat per year

In [None]:
# select for relevent related events
heat_types = ['Heat']

# filter for heat events based on event type
heat_events = all_events[all_events['damage_description'].isin(heat_types)]
heat_events = heat_events[['year', 'county_name', 'damage_description', 'determined_acres', 'indemnity_amount']]
heat_events

In [18]:
# remove any non CA counties -- drop "All Other Counties"
heat_events.county_name.unique()
heat_events = heat_events[heat_events.county_name != 'All Other Counties']

In [None]:
# calculate metrics, reset index, drop averaged year col
heat_agg = heat_events.groupby('county_name').mean('year').reset_index().drop(columns=['year'])
heat_agg

In [20]:
heat_agg['county'] = heat_agg['county_name'].str.lower() # rename col to match
heat_agg = heat_agg.drop(columns=['county_name'])

In [None]:
# add CA census tracts
heat_agg_merge = pd.merge(ca_tract_county, heat_agg, on='county', how='left')
heat_agg_merge

In [None]:
# double check counties with NaN
heat_agg_merge_nans = heat_agg_merge[heat_agg_merge.determined_acres.isnull()]
c_to_check = heat_agg_merge_nans.county.unique()

for county in c_to_check:
    print(county, county in heat_agg.county.str.upper().values)

In [31]:
# split metrics into separate files and save
heat_crop_loss_acres_metric = heat_agg_merge[['tract', 'county', 'determined_acres']]
heat_crop_loss_acres_metric = heat_crop_loss_acres_metric.rename(columns={'determined_acres':'heat_crop_loss_acres'})
heat_crop_loss_acres_metric.to_csv('climate_heat_crop_loss_acres_metric.csv', index=False)

heat_crop_loss_cost_metric = heat_agg_merge[['tract', 'county', 'indemnity_amount']]
heat_crop_loss_cost_metric = heat_crop_loss_cost_metric.rename(columns={'indemnity_amount':'heat_crop_loss_indemnity_amount'})
heat_crop_loss_cost_metric.to_csv('climate_heat_crop_loss_cost_metric.csv', index=False)

### Drought metrics
* Drought/crop loss: average # of acres lost from drought per year
* Drought/crop loss: average cost of crop loss from drought per year

In [None]:
# select for relevent related events
drought_types = ['Drought']

# filter for heat events based on event type
drought_events = all_events[all_events['damage_description'].isin(drought_types)]
drought_events = drought_events[['year', 'county_name', 'damage_description', 'determined_acres', 'indemnity_amount']]
drought_events

In [35]:
# remove any non CA counties -- drop "All Other Counties"
drought_events.county_name.unique()
drought_events = drought_events[drought_events.county_name != 'All Other Counties']

In [None]:
# calculate metrics, reset index, drop averaged year col
drought_agg = drought_events.groupby('county_name').mean('year').reset_index().drop(columns=['year'])
drought_agg

In [37]:
drought_agg['county'] = drought_agg['county_name'].str.lower() # rename col to match
drought_agg = drought_agg.drop(columns=['county_name'])

In [None]:
# add CA census tracts
drought_agg_merge = pd.merge(ca_tract_county, drought_agg, on='county', how='left')
drought_agg_merge

In [None]:
# double check counties with NaN
drought_agg_merge_nans = drought_agg_merge[drought_agg_merge.determined_acres.isnull()]
c_to_check = drought_agg_merge_nans.county.unique()

for county in c_to_check:
    print(county, county in drought_agg.county.str.upper().values)

In [42]:
# split metrics into separate files and save
drought_crop_loss_acres_metric = drought_agg_merge[['tract', 'county', 'determined_acres']]
drought_crop_loss_acres_metric = drought_crop_loss_acres_metric.rename(columns={'determined_acres':'drought_crop_loss_acres'})
drought_crop_loss_acres_metric.to_csv('climate_drought_crop_loss_acres_metric.csv', index=False)

drought_crop_loss_cost_metric = drought_agg_merge[['tract', 'county', 'indemnity_amount']]
drought_crop_loss_cost_metric = drought_crop_loss_cost_metric.rename(columns={'indemnity_amount':'drought_crop_loss_indemnity_amount'})
drought_crop_loss_cost_metric.to_csv('climate_drought_crop_loss_cost_metric.csv', index=False)

### Metadata

In [46]:
@append_metadata
def crop_loss_upload(input_csv, export=False, varname=''):
    '''
    Uploads the crop loss metrics to S3 bucket. The metrics are:
    * Drought/crop loss: average # of acres lost from drought per year
    * Drought/crop loss: average cost of crop loss from drought per year
    * Heat/crop loss: average # of acres lost from extreme heat per year
    * Heat/crop loss: average cost of crop loss from extreme heat per year
    
    Data for this metric was sourced from USDA Risk Management database:
    https://legacy.rma.usda.gov/data/cause.html

    Methods
    -------
    Relevant columns to our data metrics were isolated.
    Data was isolated to include relevant events.
    Data were grouped by county and summed to calculate final metrics.
    
    Parameters
    ----------
    input_csv: string
        csv crop loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI crop loss metrics to AWS
        True = will upload resulting df containing CAL CRAI crop loss metrics to AWS

    Script
    ------
    climate_crop_loss.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    if export == False:
        print('Data transformation: relevant columns were isolated and renamed')
        print('Data transformation: data was grouped by county and averaged by year.')
        print('Data transformation: data was merged with California census tracts.') 
        return None
    
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if os.path.exists(input_csv):
        os.remove(input_csv)

In [None]:
input_csvs = [
            'climate_heat_crop_loss_acres_metric.csv', 
            'climate_drought_crop_loss_acres_metric.csv', 
            'climate_heat_crop_loss_cost_metric.csv', 
            'climate_drought_crop_loss_cost_metric.csv'
            ]

varnames = [
            'climate_usda_drought_crop_loss',
            'climate_usda_heat_crop_loss',
            'climate_usda_drought_crop_cost',
            'climate_usda_heat_crop_cost'
            ]

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    print(f'Processing {input_csv} with varname {varname}')
    crop_loss_upload(input_csv, export=True, varname='test')
    print(f'Completed uploading {input_csv} with varname {varname}!')