In [1]:
import os, sys
import pandas as pd
import numpy as np
import xarray as xr
from itertools import product
from datetime import datetime, timedelta
import argparse
import logging
from scipy.special import gamma, factorial #gamma function
from scipy.stats import skew
from scipy.stats import norm
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.colors import LogNorm
import matplotlib.colors as colors
from mpl_toolkits.basemap import Basemap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes #put inset plot
%matplotlib inline

In [2]:
## dir folder path
dir_in = '/g/data/er4/zk6340/Hydro_projection/data_flood_scenario_qtot'
dir_out = '/g/data/er4/zk6340/code/Script_Hydro-projections'

# parameter
parameter = 'qtot'

# Which cluster
clusters = {'CS':1,'EC':2,'MB':4,'MN':5,'R':6,'SS':7,'SSWF':8,'WT':9}
which_cluster = clusters['MB']

# return period
yT = 20

# historical period
hist_st = '19760101'
hist_end = '20051231'

# projection period
yr_st = '20160101'
yr_end = '20451231'

# Participating GCMs, Bias correction approaches, Emission scenarios
gcms = ['CNRM-CERFACS-CNRM-CM5','CSIRO-BOM-ACCESS1-0','MIROC-MIROC5','NOAA-GFDL-GFDL-ESM2M']
bias_corr = ['CSIRO-CCAM-r3355-r240x120-ISIMIP2b-AWAP','r240x120-ISIMIP2b-AWAP', 'r240x120-MRNBC-AWAP', 'r240x120-QME-AWAP']
emission = ['rcp45','rcp85']

In [3]:
#Function to get to the historical/scenario file
def get_file(parameter,which_gcm,which_emission,which_bias_corr, which_metric, yr_end, yr_st):
    base_filename = '%s_AUS-5_%s_%s_r1i1p1_%s_%s_%s-%s.nc'%(parameter,which_gcm,which_emission,which_bias_corr,which_metric,yr_end,yr_st)
    return(base_filename)

In [4]:
#Function to get percentage change for 'Mean' and 'Max'
def percentage_change(parameter, gcm, scenario, bias_correction, statistics, hist_end, hist_st, yr_end, yr_st):
    # Historical period
    filename_hist = get_file(parameter, gcm,'historical', bias_correction, statistics, hist_end, hist_st)
    ds_hist = xr.open_dataset(os.path.join(dir_in,filename_hist)) 
    ds_hist_cluster = ds_hist[parameter].where(mask == which_cluster)
    ds_hist_cluster_spatial_mean = ds_hist_cluster.NRM_cluster.mean().item(0)
    # Future scenario
    filename_scenario = get_file(parameter, gcm, scenario, bias_correction, statistics,yr_end,yr_st)
    ds_scenario = xr.open_dataset(os.path.join(dir_in,filename_scenario)) 
    ds_scenario_cluster = ds_scenario[parameter].where(mask == which_cluster)
    ds_scenario_cluster_spatial_mean = ds_scenario_cluster.NRM_cluster.mean().item(0)   
    percent_change = ((ds_scenario_cluster_spatial_mean-ds_hist_cluster_spatial_mean)*100)/ds_hist_cluster_spatial_mean  
    return(percent_change)

In [5]:
# Calculates the L/LH moments: 
# Note: L-moments when h = 0 
def lh(x, h):
    lh1 = 0
    lh2 = 0
    lh3 = 0
    lh4 = 0
    ts = np.sort(x)
    n = len(ts)
    for i in range (0, len(ts)): 
        cl0 = 1
        if h>0:
            for j in range (0, h): 
                cl0 = cl0*(i-j)/(j+1)
        cl1 = cl0*(i+1-h-1)/(h+1)
        cl2 = cl1*(i+1-h-2)/(h+2)
        cl3 = cl2*(i+1-h-3)/(h+3)
        cr1 = n-(i+1)
        cr2 = cr1*(n-(i+1)-1)/2
        cr3 = cr2*(n-(i+1)-2)/3
        lh1 = lh1+cl0* ts[i]
        lh2 = lh2+(cl1-cl0*cr1)* ts[i]
        lh3 = lh3+(cl2-2*cl1*cr1+cl0*cr2)* ts[i]
        lh4 = lh4+(cl3-3*cl2*cr1+3*cl1*cr2-cl0*cr3)* ts[i] 
    c0 = 1
    if h>0:
        for j in range (0, h): 
            c0 = c0*(n+1-(j+1))/(j+1)
    c1 = c0*(n+1-h-1)/(h+1)
    c2 = c1*(n+1-h-2)/(h+2)
    c3 = c2*(n+1-h-3)/(h+3)
    c4 = c3*(n+1-h-4)/(h+4)
    lh1 = lh1/c1
    lh2 = lh2/c2/2
    lh3 = lh3/c3/3
    lh4 = lh4/c4/4
    return(lh1,lh2,lh3,lh4)

In [6]:
#Function to calculate return period: GEV distribution
def gev_return_period(data, T):
    lamda1= lh(data, 0)[0]
    lamda2= lh(data, 0)[1]
    lamda3= lh(data, 0)[2]
    lamda4= lh(data, 0)[3]
    c=(2*lamda2/(lamda3+3*lamda2))-(np.log(2)/np.log(3))
    k=7.859*c+2.9554*c**2
    alpha=k*lamda2/(gamma(1+k)*(1-2**(-k)))
    zeta=lamda1-alpha/k*(1-(gamma(1+k)))
    qt=zeta+(alpha/k)*(1-(-np.log(1-1/T))**k)
    return(qt)

In [None]:
#Function to calculate return period: LP3 distribution
def lp3_return_period(data, T):
    xbar=np.mean(data)
    stdev=np.std(data)
    xskew=skew(data)
    zp=norm.ppf(1-1/T)
    ff=(2/xskew)*((1+(zp*xskew)/6-xskew**2/36)**3)-2/xskew
    qt=xbar+stdev*ff
    return(qt)

In [7]:
#Function to get annual maximum timeseries for a grid cell
def ts_data(dataset, lat, lon, parameter):
    ds = dataset.sel(lat = lat, lon = lon, method = 'nearest')[parameter].values
    return(ds)

In [8]:
# Read cluster mask
mask = xr.open_dataset(os.path.join(dir_in,'NRM_clusters.nc'))

In [9]:
# picking up grid cells for a particular cluster
grid_cells = mask.where(mask == which_cluster).NRM_cluster.to_dataframe().reset_index()
grid_cells = grid_cells.dropna(axis=0).reset_index()

In [10]:
values = [(i, k, x) for i, k, x in product(gcms,emission, bias_corr)]

In [11]:
values

[('CNRM-CERFACS-CNRM-CM5', 'rcp45', 'CSIRO-CCAM-r3355-r240x120-ISIMIP2b-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp45', 'r240x120-ISIMIP2b-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp45', 'r240x120-MRNBC-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp45', 'r240x120-QME-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp85', 'CSIRO-CCAM-r3355-r240x120-ISIMIP2b-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp85', 'r240x120-ISIMIP2b-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp85', 'r240x120-MRNBC-AWAP'),
 ('CNRM-CERFACS-CNRM-CM5', 'rcp85', 'r240x120-QME-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp45', 'CSIRO-CCAM-r3355-r240x120-ISIMIP2b-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp45', 'r240x120-ISIMIP2b-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp45', 'r240x120-MRNBC-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp45', 'r240x120-QME-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp85', 'CSIRO-CCAM-r3355-r240x120-ISIMIP2b-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp85', 'r240x120-ISIMIP2b-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rcp85', 'r240x120-MRNBC-AWAP'),
 ('CSIRO-BOM-ACCESS1-0', 'rc

### Faster code

In [34]:
change_daily_mean = percentage_change(parameter, values[27][0], values[27][1], values[27][2], 'Mean', hist_end, hist_st, yr_end, yr_st)

#Change in max
change_daily_max = percentage_change(parameter, values[27][0], values[27][1], values[27][2], 'Max', hist_end, hist_st, yr_end, yr_st)  

#Change in return period
# Annual Max - for historical period
filename_hist_annual_max = get_file(parameter,values[27][0],'historical',values[27][2],'Annual_Max',hist_end,hist_st)
ds_hist_annual_max = xr.open_dataset(os.path.join(dir_in,filename_hist_annual_max))
# Annual Max - for scenario period
filename_scenario_annual_max = get_file(parameter,values[27][0],values[27][1],values[27][2],'Annual_Max',yr_end,yr_st)
ds_scenario_annual_max = xr.open_dataset(os.path.join(dir_in,filename_scenario_annual_max))  
# calculate retrun period for the historical period
all_yT = []
for i in range (0, len(grid_cells)): 
    hist_yT_point = gev_return_period(ts_data(ds_hist_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)
    scenario_yT_point = gev_return_period(ts_data(ds_scenario_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)       
    yT_point = {'lat':grid_cells.lat[i],'lon':grid_cells.lon[i],'hist_yT':hist_yT_point,'future_yT':scenario_yT_point}
    all_yT.append(yT_point)
all_return_period = pd.DataFrame(all_yT)   
change_return_period = ((all_return_period['future_yT'].mean()-all_return_period['hist_yT'].mean())*100)/all_return_period['hist_yT'].mean()   
dd = {'GCM':values[27][0],'Bias Correction':values[27][2],'Emission':values[27][1],'Change in Daily Mean':change_daily_mean,'Change in daily Max':change_daily_max, 'Change in Return Period':change_return_period}

In [35]:
dd

{'GCM': 'NOAA-GFDL-GFDL-ESM2M',
 'Bias Correction': 'r240x120-QME-AWAP',
 'Emission': 'rcp45',
 'Change in Daily Mean': -36.33033420702664,
 'Change in daily Max': -6.194419040153765,
 'Change in Return Period': -21.326724585821314}

In [14]:
data_test = ds_hist_annual_max.sel(lat = -32.25, lon = 145.65, method = 'nearest')[parameter].values

In [36]:
all_return_period.hist_yT.min()

0.047323745597340656

In [37]:
all_return_period.future_yT.min()

0.04000392565388619

In [15]:
data_test

array([ 0.29848486,  0.09815075,  0.10894123,  0.17728981,  0.07993447,
        0.05687778,  0.03262357,  0.08426319,  0.11417754,  2.83034754,
        0.31669611,  0.97379208,  0.3431657 ,  0.43425098,  0.3285206 ,
        0.15549701,  0.3161217 ,  0.22977819,  0.2376546 ,  2.58493233,
        0.86850059,  0.32189187, 12.81156254,  0.61593449,  0.54709554,
        0.27335545,  0.28320321,  0.30237091,  0.3749817 ,  0.27856681])

In [22]:
gev_return_period(data_test, 10)

1.2278063781658157

In [None]:
# generate plot
for grd in range(0,len(all_return_period)):
            fig = plt.figure()
            fig.set_size_inches(18.5, 10.5, forward=True)
            map = Basemap(110.,-45.,155,-10.5, 
                       lat_0=24.75, lon_0=134.0, lat_1=-10, lat_2=-40,
                        rsphere=(6378137.00,6356752.3142),
                        projection='cyl')
            map.drawcoastlines()
            map.drawstates()
            map.drawlsmask(land_color='Linen', ocean_color='white')
            map.drawcountries()
            map.drawparallels(np.arange(-43,-10,10),labels=[1,0,0,0])
            map.drawmeridians(np.arange(110,155,10),labels=[0,0,0,1])
            if all_return_period.hist_yT[grd]>0:
                plt.scatter(all_return_period.lon[grd],all_return_period.lat[grd], marker = '.', color = 'red')
            else:
                plt.scatter(all_return_period.lon[grd],all_return_period.lat[grd], marker = '.', color = 'green')
            output_file = 'Cluster: MB_historical_return_period.jpeg'
            plt.savefig(os.path.join(dir_out,output_file))

In [None]:
# generate plot
for grd in range(0,len(all_return_period)):
            fig = plt.figure()
            fig.set_size_inches(18.5, 10.5, forward=True)
            map = Basemap(110.,-45.,155,-10.5, 
                       lat_0=24.75, lon_0=134.0, lat_1=-10, lat_2=-40,
                        rsphere=(6378137.00,6356752.3142),
                        projection='cyl')
            map.drawcoastlines()
            map.drawstates()
            map.drawlsmask(land_color='Linen', ocean_color='white')
            map.drawcountries()
            map.drawparallels(np.arange(-43,-10,10),labels=[1,0,0,0])
            map.drawmeridians(np.arange(110,155,10),labels=[0,0,0,1])
            if all_return_period.future_yT[grd]>0:
                plt.scatter(all_return_period.lon[grd],all_return_period.lat[grd], marker = '.', color = 'red')
            else:
                plt.scatter(all_return_period.lon[grd],all_return_period.lat[grd], marker = '.', color = 'green')
            output_file = 'Cluster: MB_future_return_period.jpeg'
            plt.savefig(os.path.join(dir_out,output_file))

In [None]:
caluster_flood_indicator=[]
for val in enumerate(values):   
#for val in enumerate(values[0]):  
    #Change in mean
    change_daily_mean = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Mean', hist_end, hist_st, yr_end, yr_st)

    #Change in max
    change_daily_max = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Max', hist_end, hist_st, yr_end, yr_st)  
    
    #Change in return period
    # Annual Max - for historical period
    filename_hist_annual_max = get_file(parameter,val[1][0],'historical',val[1][2],'Annual_Max',hist_end,hist_st)
    ds_hist_annual_max = xr.open_dataset(os.path.join(dir_in,filename_hist_annual_max))
    # Annual Max - for scenario period
    filename_scenario_annual_max = get_file(parameter,val[1][0],val[1][1],val[1][2],'Annual_Max',yr_end,yr_st)
    ds_scenario_annual_max = xr.open_dataset(os.path.join(dir_in,filename_scenario_annual_max))  
    # calculate retrun period for the historical period
    all_yT = []
    for i in range (0, len(grid_cells)): 
        hist_yT_point = gev_return_period(ts_data(ds_hist_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)
        scenario_yT_point = gev_return_period(ts_data(ds_scenario_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)       
        yT_point = {'hist_yT':hist_yT_point,'future_yT':scenario_yT_point}
        all_yT.append(yT_point)
    all_return_period = pd.DataFrame(all_yT)   
    change_return_period = ((all_return_period['future_yT'].mean()-all_return_period['hist_yT'].mean())*100)/all_return_period['hist_yT'].mean()   
    dd = {'GCM':val[1][0],'Bias Correction':val[1][2],'Emission':val[1][1],'Change in Daily Mean':change_daily_mean,'Change in daily Max':change_daily_max, 'Change in Return Period':change_return_period}
    cluster_flood_indicator.append(dd)
    break # just want to check one loop/one ensemble

In [None]:
cluster_flood_indicator

In [None]:
values[10][0]

#### Slow code

In [None]:
cluster_flood_indicator=[]
for val in enumerate(values):   
#for val in enumerate(values[0]):  
    #Change in mean
    change_daily_mean = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Mean', hist_end, hist_st, yr_end, yr_st)

    #Change in max
    change_daily_max = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Max', hist_end, hist_st, yr_end, yr_st)  
    
    #Change in return period
    # Annual Max - for historical period
    filename_hist_annual_max = get_file(parameter,val[1][0],'historical',val[1][2],'Annual_Max',hist_end,hist_st)
    ds_hist_annual_max = xr.open_dataset(os.path.join(dir_in,filename_hist_annual_max))
    # calculate retrun period for the historical period
    hist_yT = []
    for i in range (0, len(grid_cells)): 
        hist_yT_point = gev_return_period(ts_data(ds_hist_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)
        hist_yT.append(hist_yT_point)
    hist_yT_mean = pd.DataFrame(hist_yT).mean()[0]
    # Annual Max - for scenario period
    filename_scenario_annual_max = get_file(parameter,val[1][0],val[1][1],val[1][2],'Annual_Max',yr_end,yr_st)
    ds_scenario_annual_max = xr.open_dataset(os.path.join(dir_in,filename_scenario_annual_max))
    # calculate retrun period for the scenario period
    scenario_yT = []
    for j in range (0, len(grid_cells)): 
        scenario_yT_point = gev_return_period(ts_data(ds_scenario_annual_max, grid_cells.lat[j], grid_cells.lon[j], parameter), yT)
        scenario_yT.append(scenario_yT_point)
    scenario_yT_mean = pd.DataFrame(scenario_yT).mean()[0]
    change_return_period = ((scenario_yT_mean-hist_yT_mean)*100)/hist_yT_mean
    
    dd = {'GCM':val[1][0],'Bias Correction':val[1][2],'Emission':val[1][1],'Change in Daily Mean':change_daily_mean,'Change in daily Max':change_daily_max, 'Change in Return Period':change_return_period}
    cluster_flood_indicator.append(dd)
    break # just want to check one loop/one ensemble

#### Mean of change

In [None]:
cluster_flood_indicator=[]
for val in enumerate(values):   
#for val in enumerate(values[0]):  
    #Change in mean
    change_daily_mean = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Mean', hist_end, hist_st, yr_end, yr_st)

    #Change in max
    change_daily_max = percentage_change(parameter, val[1][0], val[1][1], val[1][2], 'Max', hist_end, hist_st, yr_end, yr_st)  
    
    #Change in return period
    # Annual Max - for historical period
    filename_hist_annual_max = get_file(parameter,val[1][0],'historical',val[1][2],'Annual_Max',hist_end,hist_st)
    ds_hist_annual_max = xr.open_dataset(os.path.join(dir_in,filename_hist_annual_max))
    # Annual Max - for scenario period
    filename_scenario_annual_max = get_file(parameter,val[1][0],val[1][1],val[1][2],'Annual_Max',yr_end,yr_st)
    ds_scenario_annual_max = xr.open_dataset(os.path.join(dir_in,filename_scenario_annual_max))  
    # calculate retrun period for the historical period
    change_yT = []
    for i in range (0, len(grid_cells)): 
        hist_yT_point = gev_return_period(ts_data(ds_hist_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)
        scenario_yT_point = gev_return_period(ts_data(ds_scenario_annual_max, grid_cells.lat[i], grid_cells.lon[i], parameter), yT)
        diff_yT = ((scenario_yT_point-hist_yT_point)*100)/hist_yT_point
        change_yT.append(diff_yT)
    change_return_period = pd.DataFrame(change_yT).mean()[0]   
    dd = {'GCM':val[1][0],'Bias Correction':val[1][2],'Emission':val[1][1],'Change in Daily Mean':change_daily_mean,'Change in daily Max':change_daily_max, 'Change in Return Period':change_return_period}
    cluster_flood_indicator.append(dd)
    break # just want to check one loop/one ensemble

In [None]:
cluster_flood_indicator