### Cal-CRAI metric calculation: drought exposure
* Average annual drought % coverage
* total # of weeks in drought

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import os
import sys
import numpy as np
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# census tract info
# pull census tract data for merging
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'TRACT': 'census_tract'})
# ca_county_tract = ca_county_tract.rename(columns={'County': 'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0','COUNTYFP'})
ca_county_tract

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/drought/exposure/university_nebraska_lincoln/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

In [None]:
# read in data
drought_data = pd.read_csv(r'csv_folder/unl_drought_20000104_20201229.csv')

# drop "county" from name and merge to tracts
drought_data['County'] = drought_data['County'].str[:-7]
drought_data.head(5)

In [5]:
# add column for year to aggregate across
drought_data['drought_year'] = drought_data['MapDate'].astype(str).str[:4]

#### Metric 1: total number of weeks in drought

In [None]:
some_drought = drought_data.loc[drought_data['None'] != 100]
some_drought

In [None]:
# count number of rows per county and drought year where "None" != 100
count_drought_data = some_drought.groupby(['drought_year', 'County']).size().reset_index(name='drought_week_count')
count_drought_data

In [None]:
count_drought_data_total = count_drought_data.groupby(['County']).sum().reset_index()
count_drought_data_total.head(5)

In [None]:
# # reformatting to % of total weeks in drought
tot_num_weeks = len(drought_data.loc[(drought_data['County'] == 'Alameda')]) # 1096 weeks
count_drought_data_total['percent_weeks_drought'] = count_drought_data_total['drought_week_count'] / tot_num_weeks
count_drought_data_total.head(5)

In [None]:
total_weeks_metric = pd.merge(ca_county_tract, count_drought_data_total, on='County', how='left')
total_weeks_metric

#### Metric 2: annual average % coverage
We refactor this metric to look at percentages above certain categories of drought severity. D1 is "moderate drought". 
`annual average % of moderate to exceptional drought` 
https://droughtmonitor.unl.edu/About/AbouttheData/DroughtClassification.aspx

In [None]:
drought_data['sum_d1_d4'] = drought_data[['D1', 'D2', 'D3', 'D4']].sum(axis=1)
drought_data

In [12]:
drought_data_coverage = drought_data.groupby(['County']).mean(['sum_d1_d4', 'drought_year']).reset_index()

In [None]:
# drop unnecessary columns now
drought_data_coverage = drought_data_coverage[['County', 'sum_d1_d4']]
drought_data_coverage.head(5)

In [None]:
coverage_metric = pd.merge(ca_county_tract, drought_data_coverage, on='County', how='left')
coverage_metric = coverage_metric.rename(columns={'sum_d1_d4':'drought_coverage_percentage'})
coverage_metric

In [None]:
coverage_metric.drought_coverage_percentage.min(), coverage_metric.drought_coverage_percentage.max()

### Export

In [17]:
# export
total_weeks_metric.to_csv('climate_drought_total_weeks.csv', index=False) # done
coverage_metric.to_csv('climate_drought_coverage_metric.csv', index=False) # done

### Function Call

In [18]:
@append_metadata
def drought_metrics_metadata(input_csv, export=False, varname=''):    
    '''
    Uploads three csv files that contain metric calculations for drought classification within Cal-CRAI's Climate Domain.
    Data was sourced from the UNL from: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

    Methods
    -------
    Total weeks calculated as sum of weeks not in "None" category. 
    Annual coverage calculated as average of coverage in categories D1-D4 (moderate to exceptional drought).

    Parameters
    ----------
    df: string
        the dataframe containing the initial soil data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI drought metric to AWS
        True = will upload resulting df containing CAL CRAI drought metric to AWS
    import_csv: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    climate_drought_exposure.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    if export == False:
        print('Data transformation: drought year added to dataframe.')
        print('Data transformation: data filtered based on severity ratings.')
        print('Data transformation: average percentage values for multi-county entries.')
        print('Data transformation: merge data to California tracts.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [None]:
input_csv = [
            'climate_drought_total_weeks_metric.csv',
            'climate_drought_coverage_metric.csv',
            ]

varnames = [
    'climate_unl_drought_duration',
    'climate_unl_drought_coverage',
    ]

for csv, var in zip(input_csv, varnames):
    drought_metrics_metadata(csv, export=True, varname='test')