### Cal-CRAI metric calculation: drought exposure
* Average annual drought % coverage
* total # of weeks in drought

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import os
import sys
import numpy as np
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [2]:
# census tract info
# pull census tract data for merging
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'TRACT': 'census_tract'})
# ca_county_tract = ca_county_tract.rename(columns={'County': 'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0','COUNTYFP'})
ca_county_tract

Unnamed: 0,census_tract,County
0,6085504321,Santa Clara
1,6085504410,Santa Clara
2,6085507003,Santa Clara
3,6085507004,Santa Clara
4,6085502204,Santa Clara
...,...,...
9124,6059001303,Orange
9125,6059001304,Orange
9126,6059001401,Orange
9127,6013367200,Contra Costa


In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/drought/exposure/university_nebraska_lincoln/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

Saved DataFrame as 'csv_folder\unl_drought_20000104_20201229.csv'


In [4]:
# read in data
drought_data = pd.read_csv(r'csv_folder/unl_drought_20000104_20201229.csv')

# drop "county" from name and merge to tracts
drought_data['County'] = drought_data['County'].str[:-7]
drought_data.head(5)

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd,StatisticFormatID
0,20201229,6001,Alameda,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/29/2020,1/4/2021,2
1,20201222,6001,Alameda,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/22/2020,12/28/2020,2
2,20201215,6001,Alameda,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/15/2020,12/21/2020,2
3,20201208,6001,Alameda,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/8/2020,12/14/2020,2
4,20201201,6001,Alameda,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/1/2020,12/7/2020,2


In [5]:
# add column for year to aggregate across
drought_data['drought_year'] = drought_data['MapDate'].astype(str).str[:4]

#### Metric 1: total number of weeks in drought

In [6]:
some_drought = drought_data.loc[drought_data['None'] != 100]
some_drought

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd,StatisticFormatID,drought_year
0,20201229,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/29/2020,1/4/2021,2,2020
1,20201222,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/22/2020,12/28/2020,2,2020
2,20201215,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/15/2020,12/21/2020,2,2020
3,20201208,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/8/2020,12/14/2020,2,2020
4,20201201,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/1/2020,12/7/2020,2,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63510,20010206,6115,Yuba,CA,0.94,99.06,0.0,0.0,0.0,0.0,2/6/2001,2/12/2001,2,2001
63514,20010109,6115,Yuba,CA,83.29,16.71,0.0,0.0,0.0,0.0,1/9/2001,1/15/2001,2,2001
63565,20000118,6115,Yuba,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/18/2000,1/24/2000,2,2000
63566,20000111,6115,Yuba,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/11/2000,1/17/2000,2,2000


In [7]:
# count number of rows per county and drought year where "None" != 100
count_drought_data = some_drought.groupby(['drought_year', 'County']).size().reset_index(name='drought_week_count')
count_drought_data

Unnamed: 0,drought_year,County,drought_week_count
0,2000,Alameda,2
1,2000,Alpine,3
2,2000,Amador,3
3,2000,Butte,3
4,2000,Calaveras,3
...,...,...,...
1154,2020,Tulare,49
1155,2020,Tuolumne,49
1156,2020,Ventura,15
1157,2020,Yolo,49


In [8]:
count_drought_data_total = count_drought_data.groupby(['County']).sum().reset_index()
count_drought_data_total.head(5)

Unnamed: 0,County,drought_year,drought_week_count
0,Alameda,2000200120022003200420052007200820092010201120...,625
1,Alpine,2000200120022003200420052006200720082009201020...,752
2,Amador,2000200120022003200420052006200720082009201020...,656
3,Butte,2000200120022003200420072008200920102011201220...,645
4,Calaveras,2000200120022003200420052006200720082009201020...,657


In [9]:
# # reformatting to % of total weeks in drought
# tot_num_weeks = len(drought_data.loc[(drought_data['County'] == 'Alameda')]) # 1096 weeks
# count_drought_data_total['percent_weeks_drought'] = count_drought_data_total['drought_week_count'] / tot_num_weeks
# count_drought_data_total.head(5)

In [10]:
# total_weeks_metric = pd.merge(ca_county_tract, count_drought_data_total, on='County', how='left')
# total_weeks_metric

#### Metric 2: annual average % coverage
We refactor this metric to look at percentages above certain categories of drought severity. D1 is "moderate drought". 
`annual average % of moderate to exceptional drought` 
https://droughtmonitor.unl.edu/About/AbouttheData/DroughtClassification.aspx

In [11]:
drought_data['sum_d1_d4'] = drought_data[['D1', 'D2', 'D3', 'D4']].sum(axis=1)
drought_data

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd,StatisticFormatID,drought_year,sum_d1_d4
0,20201229,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/29/2020,1/4/2021,2,2020,100.0
1,20201222,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/22/2020,12/28/2020,2,2020,100.0
2,20201215,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/15/2020,12/21/2020,2,2020,100.0
3,20201208,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/8/2020,12/14/2020,2,2020,100.0
4,20201201,6001,Alameda,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/1/2020,12/7/2020,2,2020,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63563,20000201,6115,Yuba,CA,100.00,0.00,0.0,0.0,0.0,0.0,2/1/2000,2/7/2000,2,2000,0.0
63564,20000125,6115,Yuba,CA,100.00,0.00,0.0,0.0,0.0,0.0,1/25/2000,1/31/2000,2,2000,0.0
63565,20000118,6115,Yuba,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/18/2000,1/24/2000,2,2000,0.0
63566,20000111,6115,Yuba,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/11/2000,1/17/2000,2,2000,0.0


In [12]:
drought_data_coverage = drought_data.groupby(['County']).mean(['sum_d1_d4', 'drought_year']).reset_index()

In [13]:
# drop unnecessary columns now
drought_data_coverage = drought_data_coverage[['County', 'sum_d1_d4']]
drought_data_coverage.head(5)

Unnamed: 0,County,sum_d1_d4
0,Alameda,37.98437
1,Alpine,42.243057
2,Amador,35.663869
3,Butte,39.373431
4,Calaveras,36.183677


In [14]:
coverage_metric = pd.merge(ca_county_tract, drought_data_coverage, on='County', how='left')
coverage_metric

Unnamed: 0,census_tract,County,sum_d1_d4
0,6085504321,Santa Clara,35.687226
1,6085504410,Santa Clara,35.687226
2,6085507003,Santa Clara,35.687226
3,6085507004,Santa Clara,35.687226
4,6085502204,Santa Clara,35.687226
...,...,...,...
9124,6059001303,Orange,46.571916
9125,6059001304,Orange,46.571916
9126,6059001401,Orange,46.571916
9127,6013367200,Contra Costa,37.586770


In [15]:
coverage_metric.sum_d1_d4.min(), coverage_metric.sum_d1_d4.max()

(22.007144160583945, 55.92003649635036)

### Export

In [16]:
# export
count_drought_data_total.to_csv('climate_drought_total_weeks.csv') # done
coverage_metric.to_csv('climate_drought_coverage_metric.csv') # done

### Function Call

In [18]:
@append_metadata
def drought_metrics_metadata(input_csv, export=False, varname=''):    
    '''
    Uploads three csv files that contain metric calculations for drought classification within Cal-CRAI's Climate Domain.
    Data was sourced from the UNL from: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

    Methods
    -------
    Total weeks calculated as sum of weeks not in "None" category. 
    Annual coverage calculated as average of coverage in categories D1-D4 (moderate to exceptional drought).

    Parameters
    ----------
    df: string
        the dataframe containing the initial soil data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI drought metric to AWS
        True = will upload resulting df containing CAL CRAI drought metric to AWS
    import_csv: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    climate_drought_exposure.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    if export == False:
        print('Data transformation: drought year added to dataframe.')
        print('Data transformation: data filtered based on severity ratings.')
        print('Data transformation: average percentage values for multi-county entries.')
        print('Data transformation: merge data to California tracts.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [21]:
input_csv = [
            'climate_drought_total_weeks.csv',
            'climate_drought_coverage_metric.csv',
            ]

varnames = [
    'climate_unl_drought_duration',
    'climate_unl_drought_coverage',
    ]

for csv, var in zip(input_csv, varnames):
    drought_metrics_metadata(csv, export=False, varname=var)