### Cal-CRAI metric calculation: drought exposure
* average annual drought severity level
* average annual drought % coverage
* total # of weeks in drought

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import dask_geopandas
import dask.dataframe as dd
import matplotlib.pyplot as plt
import os
import sys

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [2]:
# census tract info
# pull census tract data for merging
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'TRACT': 'census_tract'})
# ca_county_tract = ca_county_tract.rename(columns={'County': 'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0','COUNTYFP'})
ca_county_tract

Unnamed: 0,census_tract,County
0,6085504321,Santa Clara
1,6085504410,Santa Clara
2,6085507003,Santa Clara
3,6085507004,Santa Clara
4,6085502204,Santa Clara
...,...,...
9124,6059001303,Orange
9125,6059001304,Orange
9126,6059001401,Orange
9127,6013367200,Contra Costa


In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/drought/exposure/university_nebraska_lincoln/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'unl_drought_20000104_20201229.csv'


In [4]:
# read in data
drought_data = pd.read_csv('unl_drought_20000104_20201229.csv')
drought_data.head(5)

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd,StatisticFormatID
0,20201229,6001,Alameda County,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/29/2020,1/4/2021,2
1,20201222,6001,Alameda County,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/22/2020,12/28/2020,2
2,20201215,6001,Alameda County,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/15/2020,12/21/2020,2
3,20201208,6001,Alameda County,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/8/2020,12/14/2020,2
4,20201201,6001,Alameda County,CA,0.0,0.0,0.0,100.0,0.0,0.0,12/1/2020,12/7/2020,2


#### Metric 1: average annual drought severity level

In [5]:
# add column for year to aggregate across
drought_data['drought_year'] = drought_data['MapDate'].astype(str).str[:4]

In [6]:
drought_data['D0'].unique()

array([ 0.  ,  1.49, 94.88, ..., 37.71, 34.02, 99.06])

#### Metric 2: total number of weeks in drought

In [7]:
some_drought = drought_data.loc[drought_data['None'] != 100]
some_drought

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd,StatisticFormatID,drought_year
0,20201229,6001,Alameda County,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/29/2020,1/4/2021,2,2020
1,20201222,6001,Alameda County,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/22/2020,12/28/2020,2,2020
2,20201215,6001,Alameda County,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/15/2020,12/21/2020,2,2020
3,20201208,6001,Alameda County,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/8/2020,12/14/2020,2,2020
4,20201201,6001,Alameda County,CA,0.00,0.00,0.0,100.0,0.0,0.0,12/1/2020,12/7/2020,2,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63510,20010206,6115,Yuba County,CA,0.94,99.06,0.0,0.0,0.0,0.0,2/6/2001,2/12/2001,2,2001
63514,20010109,6115,Yuba County,CA,83.29,16.71,0.0,0.0,0.0,0.0,1/9/2001,1/15/2001,2,2001
63565,20000118,6115,Yuba County,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/18/2000,1/24/2000,2,2000
63566,20000111,6115,Yuba County,CA,0.00,100.00,0.0,0.0,0.0,0.0,1/11/2000,1/17/2000,2,2000


In [8]:
# count number of rows per county and drought year where "None" != 100
count_drought_data = some_drought.groupby(['drought_year', 'County']).size().reset_index(name='drought_week_count')
count_drought_data

Unnamed: 0,drought_year,County,drought_week_count
0,2000,Alameda County,2
1,2000,Alpine County,3
2,2000,Amador County,3
3,2000,Butte County,3
4,2000,Calaveras County,3
...,...,...,...
1154,2020,Tulare County,49
1155,2020,Tuolumne County,49
1156,2020,Ventura County,15
1157,2020,Yolo County,49


In [16]:
# total number of weeks in drought
count_drought_data_total = count_drought_data.groupby(['County']).sum().reset_index()
count_drought_data_total = count_drought_data_total.drop(columns=['drought_year'])

# reformatting to % of total weeks in drought
tot_num_weeks = len(drought_data.loc[(drought_data['County'] == 'Alameda County')]) # 1096 weeks
count_drought_data_total['percent_weeks_drought'] = count_drought_data_total['drought_week_count'] / tot_num_weeks
count_drought_data_total

Unnamed: 0,County,drought_week_count,percent_weeks_drought
0,Alameda County,625,0.570255
1,Alpine County,752,0.686131
2,Amador County,656,0.59854
3,Butte County,645,0.588504
4,Calaveras County,657,0.599453
5,Colusa County,616,0.562044
6,Contra Costa County,622,0.567518
7,Del Norte County,444,0.405109
8,El Dorado County,732,0.667883
9,Fresno County,844,0.770073


In [17]:
# drop "county" from name and merge to tracts
count_drought_data_total['County'] = count_drought_data_total['County'].str[:-7]

In [19]:
total_weeks_metric = pd.merge(ca_county_tract, count_drought_data_total, on='County', how='left')
total_weeks_metric

Unnamed: 0,census_tract,County,drought_week_count,percent_weeks_drought
0,6085504321,Santa Clara,641,0.584854
1,6085504410,Santa Clara,641,0.584854
2,6085507003,Santa Clara,641,0.584854
3,6085507004,Santa Clara,641,0.584854
4,6085502204,Santa Clara,641,0.584854
...,...,...,...,...
9124,6059001303,Orange,712,0.649635
9125,6059001304,Orange,712,0.649635
9126,6059001401,Orange,712,0.649635
9127,6013367200,Contra Costa,622,0.567518


In [None]:
# export
total_weeks_metric.to_csv('climate_drought_total_weeks.csv') # done
metric2.to_csv('climate_drought_coverage_metric.csv')
metric3.to_csv('climate_drought_severity_metric.csv')

### Function Call

In [None]:
@append_metadata
def drought_metrics_metadata(input_csv, export=False, varname=''):    
    '''
    Uploads three csv files that contain metric calculations for drought classification within Cal-CRAI's Climate Domain.
    Data was sourced from the UNL from: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

    Methods
    -------

    Parameters
    ----------
    df: string
        the dataframe containing the initial soil data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI soil metric to AWS
        True = will upload resulting df containing CAL CRAI soil metric to AWS
    import_csv: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    climate_drought_exposure.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: data filtered for severity ratings.')
    print('Data transformation: average percentage values for multi-county entries.')
    print('Data transformation: merge data to California tracts.')

    bucket_name = 'ca-climate-index'
    directory = '3_fair_data/index_data'
    export_filename = [input_csv]

    if export == True:
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{export_filename} uploaded to AWS.')

    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [None]:
input_csv = [
            'climate_drought_total_weeks.csv',
            'climate_drought_coverage_metric.csv',
            'climate_drought_severity_metric.csv',
            ]

varnames = [
    'climate_unl_drought_weeks',
    'climate_unl_drought_coverage',
    'climate_unl_drought_severity'
    ]

for csv, var in zip(input_csv, varnames):
    drought_metrics_metadata(csv, export=True, varname='test')