### Cal-CRAI metric calculation: Crop Loss
This notebook calculates crop loss metrics across 2 different climate risks:
* Drought/crop loss: average # of acres lost from drought per year
* Drought/crop loss: average cost of crop loss from drought per year
* Heat/crop loss: average # of acres lost from extreme heat per year
* Heat/crop loss: average cost of crop loss from extreme heat per year

In [2]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import matplotlib.pyplot as plt
import os
import sys
import numpy as np

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [3]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [4]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/extreme_heat/loss/usda/usda_crop_loss_heat_files/'
pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'usda_crop_loss_merged.csv'


In [5]:
all_events = pd.read_csv('usda_crop_loss_merged.csv')
all_events.head(5)

Unnamed: 0,year,state_code,state_abbreviation,county_code,county_name,commodity_code,commodity_name,insurance_plan_code,insurance_plan_abbreviation,stage_code,damage_cause_code,damage_description,determined_acres,indemnity_amount
0,1989,6,CA,1,Alameda,9999,All Other Crops,90,APH,0H,12,Heat,52.0,
1,1989,6,CA,1,Alameda,9999,All Other Crops,90,APH,UH,12,Heat,6508.0,
2,1989,6,CA,7,Butte,28,ALMONDS,90,APH,04,31,Excess Moisture/Precip/Rain,615897.0,
3,1989,6,CA,7,Butte,28,ALMONDS,90,APH,0H,31,Excess Moisture/Precip/Rain,1606100.0,
4,1989,6,CA,7,Butte,28,ALMONDS,90,APH,0H,32,Poor Drainage,12866.0,


In [6]:
all_events.columns

Index(['year', 'state_code', 'state_abbreviation', 'county_code',
       'county_name', 'commodity_code', 'commodity_name',
       'insurance_plan_code', 'insurance_plan_abbreviation', 'stage_code',
       'damage_cause_code', 'damage_description', 'determined_acres',
       'indemnity_amount'],
      dtype='object')

In [7]:
columns = ['year', 'county_code', 'county_name',
          'damage_description', 'determined_acres', 'indemnity_amount']
all_events.damage_description.unique()

array(['Heat', 'Excess Moisture/Precip/Rain', 'Poor Drainage',
       'Cold Wet Weather', 'Frost', 'Wildlife', 'Hot Wind', nan, 'Hail',
       'Other (Snow-Lightning-Etc.)', 'Plant Disease', 'Drought',
       'Freeze', 'Insects', 'Wind/Excess Wind', 'Fruit Set Failure',
       'Failure Irrig Supply', 'Flood', 'Cold Winter', 'Excess Sun',
       'Mycotoxin (Aflatoxin)', 'Fire', 'Insufficient Chilling Hours',
       'Earthquake', 'Decline in Price', 'Cyclone', 'Other Causes',
       'Excess Moisture/Precipitation/Rain', 'Wind/Excess Win',
       'Other (Snow, Lightening, Etc.)', 'Failure of Irrigation Supply',
       'GRP/Grip Crops', 'Failure of Irrigation Equipment', 'Tornado',
       'Hurricane/Tropical Depression', 'Other (Snow, Lightning, Etc.)',
       'ARPI Crops Only', 'ARPI/SCO/STAX Crops Only',
       'ARPI/SCO/STAX/MP Crops Only',
       'Inability to Prepare Land for Irrigation',
       'Federal or State Ordered Destruction'], dtype=object)

#### Quick check that the drought version of this file is identical to the heat version so we do not have to pull twice

In [8]:
heat_v = all_events # heat version

aws_dir = '1_pull_data/climate_risk/drought/loss/usda/usda_crop_loss_heat_files/'
pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)
drought_v = pd.read_csv('usda_crop_loss_CA_final.csv') # drought version

Saved DataFrame as 'usda_crop_loss_CA_final.csv'


In [9]:
# isolate for relevant columns in both
heat_v = heat_v[columns]
drought_v = drought_v[columns]

# subset for heat and drought to confirm
event_types = ['Drought', 'Heat']
heat_v_events = heat_v[heat_v['damage_description'].isin(event_types)]
drought_v_events = drought_v[drought_v['damage_description'].isin(event_types)]

# reset index and drop old mismatch index col
heat_v_events = heat_v_events.sort_values(['year','determined_acres']).reset_index().drop(columns=['index'])
drought_v_events = drought_v_events.sort_values(['year','determined_acres']).reset_index().drop(columns=['index'])

# check they are identical
heat_v_events.equals(drought_v_events)
heat_v_events.compare(drought_v_events) # empty -- meaning they are identical

## heat version is identical to the drought version, only need to pull one to calculate both
os.remove('usda_crop_loss_CA_final.csv')

### Heat Metrics
* Heat/crop loss: average # of acres lost from extreme heat per year
* Heat/crop loss: average cost of crop loss from extreme heat per year

In [10]:
# select for relevent related events
heat_types = ['Heat']

# filter for heat events based on event type
heat_events = all_events[all_events['damage_description'].isin(heat_types)]
heat_events = heat_events[['year', 'county_name', 'damage_description', 'determined_acres', 'indemnity_amount']]
heat_events

Unnamed: 0,year,county_name,damage_description,determined_acres,indemnity_amount
0,1989,Alameda,Heat,52.0000,
1,1989,Alameda,Heat,6508.0000,
6,1989,Butte,Heat,15029.0000,
7,1989,Butte,Heat,2442.0000,
9,1989,Butte,Heat,3400.0000,
...,...,...,...,...,...
24602,2018,Tulare,Heat,462.0750,921996.95
24621,2018,Ventura,Heat,209.2900,395839.46
24622,2018,Ventura,Heat,1.8000,2275.50
24644,2018,Yolo,Heat,16.8216,6998.00


In [11]:
# remove any non CA counties -- drop "All Other Counties"
heat_events.county_name.unique()
heat_events = heat_events[heat_events.county_name != 'All Other Counties']

In [12]:
# calculate metrics, reset index, drop averaged year col
heat_agg = heat_events.groupby('county_name').mean('year').reset_index().drop(columns=['year'])
heat_agg

Unnamed: 0,county_name,determined_acres,indemnity_amount
0,Alameda,769.522222,14348.571429
1,Amador,90.074444,60287.4
2,Butte,16730.358838,175273.221935
3,Calaveras,22.030538,16831.378462
4,Colusa,8829.447301,61945.176404
5,Contra Costa,112.148259,65657.232593
6,El Dorado,14.646154,10178.523077
7,Fresno,11028.996145,342554.026913
8,Glenn,18649.035317,138920.124444
9,Imperial,1333.288654,475124.56


In [13]:
heat_agg['county'] = heat_agg['county_name'].str.lower() # rename col to match
heat_agg = heat_agg.drop(columns=['county_name'])

In [14]:
# add CA census tracts
heat_agg_merge = pd.merge(ca_tract_county, heat_agg, on='county', how='left')
heat_agg_merge

Unnamed: 0,tract,countyfp,county,determined_acres,indemnity_amount
0,06085504321,085,santa clara,112.933200,221115.456000
1,06085504410,085,santa clara,112.933200,221115.456000
2,06085507003,085,santa clara,112.933200,221115.456000
3,06085507004,085,santa clara,112.933200,221115.456000
4,06085502204,085,santa clara,112.933200,221115.456000
...,...,...,...,...,...
9124,06059001303,059,orange,,
9125,06059001304,059,orange,,
9126,06059001401,059,orange,,
9127,06013367200,013,contra costa,112.148259,65657.232593


In [15]:
# double check counties with NaN
heat_agg_merge_nans = heat_agg_merge[heat_agg_merge.determined_acres.isnull()]
c_to_check = heat_agg_merge_nans.county.unique()

for county in c_to_check:
    print(county, county in heat_agg.county.str.upper().values)

orange False
san francisco False
los angeles False
del norte False
alpine False
trinity False
san mateo False
mariposa False
nevada False
tuolumne False
humboldt False
plumas False
sierra False
inyo False
mono False


In [16]:
# split metrics into separate files and save
heat_crop_loss_acres_metric = heat_agg_merge[['tract', 'county', 'determined_acres']]
heat_crop_loss_acres_metric.to_csv('climate_heat_crop_loss_acres_metric.csv', index=False)

heat_crop_loss_cost_metric = heat_agg_merge[['tract', 'county', 'indemnity_amount']]
heat_crop_loss_cost_metric.to_csv('climate_heat_crop_loss_cost_metric.csv', index=False)

### Drought metrics
* Drought/crop loss: average # of acres lost from drought per year
* Drought/crop loss: average cost of crop loss from drought per year

In [17]:
# select for relevent related events
drought_types = ['Drought']

# filter for heat events based on event type
drought_events = all_events[all_events['damage_description'].isin(drought_types)]
drought_events = drought_events[['year', 'county_name', 'damage_description', 'determined_acres', 'indemnity_amount']]
drought_events

Unnamed: 0,year,county_name,damage_description,determined_acres,indemnity_amount
37,1989,Fresno,Drought,2668.0000,
47,1989,Fresno,Drought,8345.0000,
69,1989,Kern,Drought,4333.0000,
70,1989,Kings,Drought,915.0000,
71,1989,Kings,Drought,777.0000,
...,...,...,...,...,...
24562,2018,Tulare,Drought,769.2450,53456.0
24563,2018,Tulare,Drought,486.7625,21315.0
24564,2018,Tulare,Drought,577.3570,34554.1
24635,2018,Yolo,Drought,106.5900,3861.8


In [18]:
# remove any non CA counties -- drop "All Other Counties"
drought_events.county_name.unique()
drought_events = drought_events[drought_events.county_name != 'All Other Counties']

In [19]:
# calculate metrics, reset index, drop averaged year col
drought_agg = drought_events.groupby('county_name').mean('year').reset_index().drop(columns=['year'])
drought_agg

Unnamed: 0,county_name,determined_acres,indemnity_amount
0,Alameda,197.583333,7394.25
1,Butte,87.6,4084.4
2,Calaveras,32.67,17176.0
3,Colusa,180.764706,9991.0625
4,Contra Costa,488.5,
5,El Dorado,25.875,3418.25
6,Fresno,89505.42764,398630.03359
7,Glenn,2175.65,36390.883333
8,Kern,8498.172333,62177.907
9,Kings,102431.335813,117433.226226


In [20]:
drought_agg['county'] = drought_agg['county_name'].str.lower() # rename col to match
drought_agg = drought_agg.drop(columns=['county_name'])

In [21]:
# add CA census tracts
drought_agg_merge = pd.merge(ca_tract_county, drought_agg, on='county', how='left')
drought_agg_merge

Unnamed: 0,tract,countyfp,county,determined_acres,indemnity_amount
0,06085504321,085,santa clara,,
1,06085504410,085,santa clara,,
2,06085507003,085,santa clara,,
3,06085507004,085,santa clara,,
4,06085502204,085,santa clara,,
...,...,...,...,...,...
9124,06059001303,059,orange,,
9125,06059001304,059,orange,,
9126,06059001401,059,orange,,
9127,06013367200,013,contra costa,488.500000,


In [22]:
# double check counties with NaN
drought_agg_merge_nans = drought_agg_merge[drought_agg_merge.determined_acres.isnull()]
c_to_check = drought_agg_merge_nans.county.unique()

for county in c_to_check:
    print(county, county in drought_agg.county.str.upper().values)

santa clara False
orange False
san francisco False
del norte False
ventura False
alpine False
trinity False
san mateo False
amador False
lake False
imperial False
mariposa False
nevada False
yuba False
tuolumne False
marin False
santa cruz False
humboldt False
plumas False
sierra False
inyo False
mono False


In [23]:
# split metrics into separate files and save
drought_crop_loss_acres_metric = drought_agg_merge[['tract', 'county', 'determined_acres']]
drought_crop_loss_acres_metric.to_csv('climate_drought_crop_loss_acres_metric.csv', index=False)

drought_crop_loss_cost_metric = drought_agg_merge[['tract', 'county', 'indemnity_amount']]
drought_crop_loss_cost_metric.to_csv('climate_drought_crop_loss_cost_metric.csv', index=False)

### Metadata

In [24]:
@append_metadata
def crop_loss_upload(input_csv, export=False, varname=''):
    '''
    Uploads the crop loss metrics to S3 bucket. The metrics are:
    * Drought/crop loss: average # of acres lost from drought per year
    * Drought/crop loss: average cost of crop loss from drought per year
    * Heat/crop loss: average # of acres lost from extreme heat per year
    * Heat/crop loss: average cost of crop loss from extreme heat per year
    
    Data for this metric was sourced from USDA Risk Management database:
    https://legacy.rma.usda.gov/data/cause.html

    Methods
    -------
    Relevant columns to our data metrics were isolated.
    Data was isolated to include relevant events.
    Data were grouped by county and summed to calculate final metrics.
    
    Parameters
    ----------
    input_csv: string
        csv crop loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI crop loss metrics to AWS
        True = will upload resulting df containing CAL CRAI crop loss metrics to AWS

    Script
    ------
    climate_flood_loss.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data was grouped by county and averaged by year.')
    print('Data transformation: data was merged with California census tracts.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [25]:
input_csvs = ['climate_heat_crop_loss_acres_metric.csv',
               'climate_drought_crop_loss_acres_metric.csv',
              'climate_heat_crop_loss_cost_metric.csv',
              'climate_drought_crop_loss_cost_metric.csv',
]

varnames = ['climate_usda_heat_crop_cost',
            'climate_usda_drought_crop_cost',
            'climate_usda_heat_acres_lost',
            'climate_usda_drought_acres_lost'
            ]

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    print(f'Processing {input_csv} with varname {varname}')
    crop_loss_upload(input_csv, export=True, varname='test')
    print(f'Completed uploading {input_csv} with varname {varname}!')

Processing climate_heat_crop_loss_acres_metric.csv with varname climate_usda_heat_crop_cost
Completed uploading climate_heat_crop_loss_acres_metric.csv with varname climate_usda_heat_crop_cost!
Processing climate_drought_crop_loss_acres_metric.csv with varname climate_usda_drought_crop_cost
Completed uploading climate_drought_crop_loss_acres_metric.csv with varname climate_usda_drought_crop_cost!
Processing climate_heat_crop_loss_cost_metric.csv with varname climate_usda_heat_acres_lost
Completed uploading climate_heat_crop_loss_cost_metric.csv with varname climate_usda_heat_acres_lost!
Processing climate_drought_crop_loss_cost_metric.csv with varname climate_usda_drought_acres_lost
Completed uploading climate_drought_crop_loss_cost_metric.csv with varname climate_usda_drought_acres_lost!
