In [None]:
import os
import glob
import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from scipy.stats import genextreme as gev

from unseen import fileio
from unseen import time_utils
from unseen import indices
from unseen import general_utils

In [None]:
# Parameters
dpi = 300

In [None]:
assert os.path.isfile(metadata_file), "Must provide a metadata file (papermill option -p metadata_file [filepath])"
assert os.path.isfile(shapefile), "Must provide a shapefile (papermill option -p shapefile [filepath])"
assert 'rx15day_file' in locals(), "Must provide an rx15day output file (papermill option -p rx15day_file [filepath])"
assert 'region_name' in locals(), "Must provide a region name (papermill option -p region_name [name])"

## Generate Rx15day data

In [None]:
agcd_files = glob.glob('/g/data/xv83/agcd-csiro/precip/precip-total_AGCD-CSIRO_r005_*_daily.nc')
agcd_files.sort()

In [None]:
#test = fileio.open_dataset(
#    agcd_files,
#    chunks={'time': 8000, 'lat': 40, 'lon': 40},
#    metadata_file=metadata_file,
#    shapefile=shapefile,
#    variables=['pr'],
#    spatial_agg='mean',
#    input_freq='D',
#    rolling_sum_window=15,
#    time_freq='A-AUG',    
#    time_agg='max',
#)

In [None]:
ds_list = []
for infile in agcd_files:
    print(infile)
    ds = fileio.open_dataset(
        infile,
        metadata_file=metadata_file,
        shapefile=shapefile,
        variables=['pr'],
        spatial_agg='mean', 
    )
    ds = ds.compute()
    ds_list.append(ds)

In [None]:
ds = xr.concat(ds_list, dim='time')

In [None]:
ds

In [None]:
ds = ds.compute()

In [None]:
clim = ds.groupby('time.month').mean()
clim

In [None]:
clim['pr'].plot()

In [None]:
ds_15day = ds.rolling({'time': 15}).sum()
ds_rx15day = time_utils.temporal_aggregation(ds_15day, 'A-AUG', 'D', 'max', ['pr'])

In [None]:
ds_rx15day

In [None]:
time_stamp = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
ds_rx15day.attrs['history'] = f'{time_stamp}: /home/599/dbi599/east-coast-rain/AGCD_{region_name}.ipynb (git@github.com:AusClimateService/east-coast-rain)'
fileio.to_zarr(ds_rx15day, rx15day_file)

## Analyse and plot Rx15day data

In [None]:
ds_rx15day['pr'].plot()
plt.title(f'Annual (Sep-Aug) Rx15day for {region_name} (AGCD)')
plt.ylabel('precipitation (mm)')
plt.xlabel('year')
plt.savefig(
    f'/g/data/xv83/dbi599/east-coast-rain/figures/Rx15day_timeseries_AGCD_{region_name}.png',
    bbox_inches='tight',
    facecolor='white',
    dpi=dpi
)
plt.show()

In [None]:
years = ds_rx15day['time'].dt.year.values
df_rx15day = pd.DataFrame(index=years)
df_rx15day['pr'] = ds_rx15day['pr'].values

In [None]:
df_rx15day['pr'].sort_values(ascending=False).head(n=10)

In [None]:
rx15day_max = df_rx15day['pr'].values.max()
print(rx15day_max)

In [None]:
def gev_analysis(ds, event, region, savefig=False):
    """Perform GEV analysis
    
    Args:
      ds (Pandas Series): Data sample
      event (float) : Event of interest
      region (str) : Name of spatial region
    """

    gev_shape, gev_loc, gev_scale = indices.fit_gev(ds.values)
    print(f'Shape parameter: {gev_shape:.2f}')
    print(f'Location parameter: {gev_loc:.2f}')
    print(f'Scale parameter: {gev_scale:.2f}')

    fig, ax = plt.subplots(figsize=[10, 8])
    gev_xvals = np.arange(0, 700)
    ds.plot.hist(bins=40, density=True, color='tab:green', alpha=0.5)
    gev_pdf = gev.pdf(gev_xvals, gev_shape, gev_loc, gev_scale)
    plt.plot(gev_xvals, gev_pdf, color='tab:green', linewidth=4.0)
    plt.xlabel('precipitation (mm)')
    plt.ylabel('probability')
    plt.title(f'Annual (Sep-Aug) Rx15day for {region} (AGCD)')
    if savefig:
        plt.savefig(
            f'/g/data/xv83/dbi599/east-coast-rain/figures/Rx15day_histogram_AGCD_{region}.png',
            bbox_inches='tight',
            facecolor='white',
            dpi=dpi
        )
    plt.show()
    
    gev_data = gev.rvs(gev_shape, gev_loc, gev_scale, size=5000000)
    n_events, n_population, return_period, percentile = general_utils.event_in_context(gev_data, event, 'above')
    print(f'{n_events} events in {n_population} samples')
    print(f'{percentile:.2f}% percentile')
    print(f'{return_period:.0f} year return period\n')

In [None]:
gev_analysis(df_rx15day['pr'], rx15day_max, region_name, savefig=True)

In [None]:
gev_analysis(df_rx15day['pr'][:-1], rx15day_max, region_name)