In [None]:
## Load necessary libraries
%matplotlib inline
from constructIDF import *
import pandas as pd
import numpy as np
import itertools
import argparse
import matplotlib.pyplot as plt

from matplotlib import rcParams

rcParams['xtick.labelsize'] = 14
rcParams['ytick.labelsize'] = 14

### Step 1: Read file with precipitation time series

This notebook uses as input precipitation time series previously obtained from the National Oceanic and Atmospheric Administration National Centers for Environmental Information accessed [here](https://www.ncei.noaa.gov/data/coop-hourly-precipitation/v2/)). 

The data from the NCEI needs to be reformatted previously to run this notebook using the
`Reformat_NCEI_data` jupyter notebook


In [None]:
"[*]"

# Specify path to hourly rainfall time series

#path = r"/Users/climate_class/this/is/an/example/path/output_from_Reformat_NCEI_data_notebook.csv" ###MAC
path = r"\Users\climate_class\this\is\an\example\path\output_from_Reformat_NCEI_data_notebook.csv" ###WINDOWS

Read data into a dataframe and examine.
Precipitation time series are organized by date and record, with flags inherited from the NCEI data. For more information on the flag meanings, visit
https://www.ncei.noaa.gov/data/coop-hourly-precipitation/v2/doc/readme.csv.txt

In [None]:
station_data = pd.read_csv(path, index_col=0)
station_data.head()

### Step 2: Construct Annual Maximum Series

IDF curves are constructed based on the Annual Maximum Series (AMS) or Partial Duration Series (PDS). In this tutorial, and given the functions implemented in `constructIDF` we will construct IDF based on AMS. The AMS corresponds to the maximum rainfall value accumulated during a specific duration for each year in record. We want to construct IDF curves for longer durations than one hour (e.g. 2 hours, 6 hours, etc.) and we can do so by aggregating the series above to the corresponding duration

In [None]:
# Specify durations for which IDF curves will be created
# From one hour, to 3 days.

durations = [24]

In [None]:
# Reformat station data and specify durations to compute AMS
ts = AMS(path, durations)


In [None]:
# This is the data that will be fed into the AMS methods.
ts.reformatted_frame.head()

Calculate AMS for each duration specified above. There are two methods implemented in `constructIDF`: *fixed maxima* and *sliding maxima* algorithms. The algorithms are provided in 

> Papalexiou, S. M., Dialynas, Y. G., & Grimaldi, S. (2016). Hershfield factor revisited: Correcting annual maximum precipitation. Journal of Hydrology, 542, 884–895. https://doi.org/10.1016/j.jhydrol.2016.09.058

These two functions extract the annual maximum precipitation (AMS) from a precipitation time series. The two approaches arise from the need to account for the fact that precipitation is systematicall recorded. For example, at a meteorological station, someone will check the tipping bucket pluviographs at some fixed local time each day which is then recorded as a "daily rainfall time series" at a particular location. This case results in "fixed" records, but "fixed" records have been shown inappropriate for estimating rainfall maxima. Because rainfall is a continuous variable, discretizing it can result in biases when estimating extreme rainfall, so it is advised to estimate annual maximum series using the sliding maxima approach.

In [None]:
ts.reformatted_frame

In [None]:
# Calculate AMS for each duration specified above.
# AMS can be calculated using sliding maxima. (This might take some time.)
out = pd.DataFrame(ts.reformatted_frame.groupby(pd.Grouper(key='date', freq='A')).agg(lambda x: x.max()).val)
out['year'] = out.index.year
out.dropna(how='any', inplace=True)

In [None]:
#################
#               #
#  AMS TABLE    #
#               #
#################


## Take a look at the computed AMS for each duration

out.head()

### Step 3: Fit Generalized Extreme Value and obtain rainfall depths

The next step is to fit a generalized extreme value distribution to each duration's AMS. Once the parameters (location, scale and shape) are estimated, these are used to retrieve the return levels (in this case, rainfall depth) for different quantiles feed into the inverse of the CDF. Usually, the quantiles are equal to the inverse of the average recurrence interval (ARI) (e.g. 1/2 = 2-year).

`constructIDF` has one method that merges all these steps, but we need to specify if we want to construct confidence intervals. The method implemented in `constructIDF` is bootstrapping, so we also need to specify the number of bootsrapped samples. Default value is 1000, and while using a smaller number is not recommended, a larger number of bootstrapped samples increases the time it takes for this cell to run.

Other specification is the confidence level, alpha, used to estimate the confidence intervals. Default is 0.9 (90% confidence interval).


In this tutorial, we will compute confidence intervals at a 90% confidence level using 50 bootstrapped samples.

```python
ci = True
alpha = 0.9
number_bootstrap = 50 
```

In [None]:
# Specifying values
ci = True 
alpha = 0.9
number_bootstrap = 50

In [None]:
"[~~~]"

# Feeding the data and our specifications to the method.

data = IDF(out, ci, number_bootstrap, alpha)

# Construct IDF from the data we feed above and our specifications.
# Some errors will be displayed, no worries. This will take long time because
# of the number of bootsrapped samples.
# The constructed IDF is by default for the following ARI:
# 2-, 5-, 10-, 25-, 50-, 100-, 200-year

data.construct_IDF()


In [None]:
#################
#               #
#  IDF TABLE    #
#               #
#################


data.idf
data.idf_t = data.idf.transpose()


In [None]:
data.idf

### Step 4: Generate IDF curves and store IDF values

We will run the next cells to create the IDF curves and plot them.
We need to pass the path where the original data was stored, a path where to store
the figure and its format.

#### Step 4.1: Specify a path where to store figure, and the figure format

Specify the path and format in the cell below between quotation marks

In [None]:
"[*]"

figure_path = ""
figure_name = "fut_station_idf_plot"
figure_format = "jpg"

#### Step 4.2: Store values from IDF table

The code below saves the IDF table (`data.idf`) to a csv file in the specified path 

In [None]:
"[*]"
#data.idf.to_csv(r"/Users/climate_class/Example/Path/hist_station_idf.csv") ###MAC
data.idf.to_csv( r"\Users\climate_class\Example\Path\hist_station_idf.csv") ###WINDOWS

In [None]:
median = data.idf_t.drop([x for x in data.idf_t.columns if 'L' in x], axis=1)
median = median.drop([x for x in median.columns if 'U' in x], axis=1)
data.idf.columns = ['24H']

In [None]:
upper_level = data.idf_t.drop([x for x in data.idf_t.columns if 'U' not in x], axis=1)
lower_level = data.idf_t.drop([x for x in data.idf_t.columns if 'L' not in x], axis=1)

### Step 5: Plot the IDF Curve

In [None]:
# This line generates a plot and saves in your folder specified above
rcParams['xtick.labelsize'] = 14
rcParams['ytick.labelsize'] = 14

fig, axs = plt.subplots(figsize=(10, 7))
median.transpose().plot(ax=axs, label='median', c='k')
plt.fill_between(np.arange(0, 7), lower_level.transpose().val.values,
                         upper_level.transpose().val.values, alpha=0.3)
plt.xticks(np.arange(0, 7), ('2', '5', '10',
                                         '25', '50', '100', '200'))
plt.legend(['Median', '95% CI'], fontsize = 18, loc=2)
plt.xlabel('Return Period (years)', fontsize=18)
plt.ylabel('Precipitation Depth (in)', fontsize=18)
plt.title('Pittsburgh Historical 24-hour DDF curve', fontsize=22)


#if figure_path[-1] == "/": ###MAC
if figure_path[-1] == r"\\": ###WINDOWS
    path_save_fig ='{}{}.{}'.format(figure_path, figure_name, figure_format)
else:
    #path_save_fig ='{}/{}.{}'.format(figure_path, figure_name, figure_format) ###MAC
    path_save_fig ='{}\{}.{}'.format(figure_path, figure_name, figure_format) ###WINDOWS

#print(path_save_fig) #I added this line
plt.savefig(path_save_fig, bbox_inches='tight')