### Combine the linear and rainfed (modelled) interpolated water levels

**Previous code:**
Determine_transfer_function_WTD_transects.ipynb

**Aim:**
To produce a water level map that uses the modelled interpolation for areas that are significantly rainfed, and the linear interpolation elsewhere.

**Inputs**
- interpolated rainfed water level map
- interpolated linear water level map
- pixel-wise correlation maps (pvalues, correlation, slopes, standard errors)
- MERIT Hydro data

**Method**
- interpolate the 90m MERIT Hydro data to the 100m grid of the water level maps
- compare the MERIT Hydro data derived metrics with the pixel-wise correlation maps
- substitute non-rainfed water-level pixels with the linear interpolation
- there are some pixelated/speckled points in the water level images due to amplification of speckle in the original HH data. A smoothing algorithm will be applied to spatially interpolate over these areas only.

**Outputs**
- The final water level map
- A corresponding standard errors map
- An analysis of the correspondece between the MERIT elevation data (slope, eleveation etc) with the likelihood of a pixel being rainfed
- Figures within the final paper: A3, A4, A5, A6

**Next**
- Final validation with altimetry data

**Notes**
- The final criteria used for applying the rainfed interpolation is: -value< 0.15, which approximately corresponds with correlation > 0.3
- Otherwise, the linear interpolation is used. This covers the flood bank regions, and regions where there are negative pixel wise correlations. 


In [None]:
import sys
import pandas as pd
import xarray as xr
import numpy as np
xr.set_options(cmap_sequential='jet')
import matplotlib.pyplot as plt
import warnings
import netCDF4
import datetime
import plotly
import plotly.express as px
import plotly.graph_objects as go
from affine import Affine
import cartopy.crs as ccrs
import nco
import xesmf as xe
from calendar import monthrange
import pickle as pkl
from datetime import date, datetime, timedelta
import math
import seaborn as sns
from scipy import stats
from IPython.display import display

%xmode Minimal

### Functions

In [2]:
import matplotlib.colors as mcolors
from matplotlib.colors import Normalize
import matplotlib as mpl

plt.rcParams['figure.dpi'] = 200

#from matplotlib.colors import MidpointNormalize

def plot_maps(ds, var_name, vmin, vmax):

    midpoint = 1 - vmax / (vmax + abs(vmin))
    print (midpoint)
    start = 0.12
    cmap_new = shiftedColorMap(matplotlib.cm.RdBu, start=start, midpoint= midpoint, stop=1, name='shiftedcmap')
    
    #sns.set(font_scale=1.2, rc={'axes.facecolor':'#D1D3D4'}, style='ticks')
    sns.set(font_scale=1.3, rc={'axes.facecolor':'white'}, style='ticks')

    
    # formatting the time for better subplot titles
    time_formatted = ds['time'].dt.strftime('%b %Y')
    
    fig, ax = plt.subplots(nrows=4, ncols=5, figsize = (18,16))
    ax = ax.ravel()

    # adding more space between rows
    plt.subplots_adjust(hspace=0.5)
    #plt.title('Maximum monthly water level (cm)')
    
    for i, subplot in enumerate(ax):
        ds[var_name].isel(time=i).plot(x='lon',y='lat',ax=subplot,vmin=vmin,vmax=vmax, \
                                                  cmap=cmap_new, add_colorbar=False)
        subplot.set_xlabel('')
        subplot.set_ylabel('')
        subplot.set_title(time_formatted.isel(time=i).item())
    
    cax = fig.add_axes([0.92, 0.1, 0.02, 0.8])

    # normalising the colourbar so that the white value is at 0 water level
    norm = Normalize(vmin=vmin, vmax=vmax)
    print (norm)
    cbar = plt.colorbar(subplot.collections[0], cax=cax)
    cbar.set_label(var_name, size=18)
    
    # original version that didn't use subplots, but the layout wasn't as good
    #ds[var_name][:,::5,::5].plot(x='lon',y='lat',col='time',col_wrap=5,\
    #                                  vmin=vmin,vmax=vmax, ax=ax, cmap='custom_cmap', )


In [3]:
import matplotlib.colors as mcolors
from matplotlib.colors import Normalize
import matplotlib as mpl

plt.rcParams['figure.dpi'] = 200

#from matplotlib.colors import MidpointNormalize

def plot_maps2(ds, var_name, vmin, vmax):

    sns.set(font_scale=1.3, rc={'axes.facecolor':'white'}, style='ticks')
    
    # formatting the time for better subplot titles
    time_formatted = ds['time'].dt.strftime('%b %Y')
    
    fig, ax = plt.subplots(nrows=4, ncols=5, figsize = (18,16))
    ax = ax.ravel()

    # adding more space between rows
    plt.subplots_adjust(hspace=0.5)
  
    for i, subplot in enumerate(ax):
        ds[var_name].isel(time=i).plot(x='lon',y='lat',ax=subplot,vmin=vmin,vmax=vmax, \
                                                  cmap='viridis', add_colorbar=False)
        subplot.set_xlabel('')
        subplot.set_ylabel('')
        subplot.set_title(time_formatted.isel(time=i).item())
    
    cax = fig.add_axes([0.92, 0.1, 0.02, 0.8])

    # normalising the colourbar so that the white value is at 0 water level
    norm = Normalize(vmin=vmin, vmax=vmax)
    print (norm)
    cbar = plt.colorbar(subplot.collections[0], cax=cax)
    cbar.set_label(var_name, size=18)



In [4]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid

def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    #### This function was copied from: https://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero.

    Input
    -----
      cmap : The matplotlib colormap to be altered
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower offset). Should be between
          0.0 and `midpoint`.
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax / (vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      stop : Offset from highest point in the colormap's range.
          Defaults to 1.0 (no upper offset). Should be between
          `midpoint` and 1.0.
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap

### Main code

In [9]:
### Declare data and output directories here

# publication quality plots
plt.rcParams['figure.dpi'] = 500

In [6]:
## Water level datasets

# rainfed
ds_r = xr.open_dataset(ALOS_OUT + 'HH_modelled_water_level_ts.nc')

# linear
ds_l = xr.open_dataset(ALOS_OUT + 'HH_linear_water_level_ts.nc')

# land type map
lt_map = xr.open_dataset(ALOS_OUT + 'landtype_100m.nc')

# pixel wise correlation slope data
pw_slopes = xr.open_dataset(ALOS_OUT + 'slopes_da_HH_new.nc')
test = pw_slopes.to_array().to_dataset(name='slope')
pw_slopes = test['slope'][0][0].to_dataset(name='slope')
pw_slopes = pw_slopes.where(lt_map['type'].isin([4,5]))
pw_slopes = pw_slopes.drop('variable')

# 2. correlations
pw_corrs = xr.open_dataset(ALOS_OUT + 'corrs_da_HH_new.nc')
test = pw_corrs.to_array().to_dataset(name='correlation')
pw_corrs = test['correlation'][0][0].to_dataset(name='correlation')
pw_corrs = pw_corrs.where(lt_map['type'].isin([4,5]))
pw_corrs = pw_corrs.drop('variable')

# 3. p-values
pw_pvals = xr.open_dataset(ALOS_OUT + 'pvals_da_HH_new.nc')
test = pw_pvals.to_array().to_dataset(name='pvalue')
pw_pvals = test['pvalue'][0][0].to_dataset(name='pvalue')
pw_pvals = pw_pvals.where(lt_map['type'].isin([4,5]))
pw_pvals = pw_pvals.drop('variable')

# 4. standard errors
pw_stderrs = xr.open_dataset(ALOS_OUT + 'stderrs_da_HH_new.nc')
test = pw_stderrs.to_array().to_dataset(name='stderr')
pw_stderrs = test['stderr'][0][0].to_dataset(name='stderr')
pw_stderrs = pw_stderrs.where(lt_map['type'].isin([4,5]))
pw_stderrs = pw_stderrs.drop('variable')

## Combining the rainfed and linear maps
**Steps**
- a p-value criteria of p < 0.15 is applied to determine where the rainfed map should be used
- mask created
- mask applied to combine the rainfed and linearly interpolated maps
- concatenation of the outputs using a daily list of datetimes

In [None]:
# defining the mask where the water level map should use the linear interpolation
ds = pw_pvals.where(pw_pvals['pvalue']<0.15)
ds['pvalue'].plot()

mask = ds['pvalue'].isnull()

mask = mask.reindex(lat=list(reversed(mask.lat)))
mask

### Applying the mask to create combined rainfed and floodprone region water level maps

In [None]:
%%time
# the mask has been applied for areas with p-value>0.15
WL_final = []

# looping through each daily image (it crashes if trying to apply the operation across all images at once)
for i in range(len(ds_l['water_level'])):
    print (i)
    # assigning masked regions with the linear interpolation. ds_r is the rainfed map and ds_l is the linear interpolation for non-rainfed regions
    WL_final.append(xr.where(mask, ds_l['water_level'][i], ds_r['water_level'][i]))

WL_final
#ds_wl['water_level'][21][::5,::5].plot()

### Concatenating the outputs 

In [None]:
# Defining datetime series for the current set of ALOS-2 images

# array of alos2 scene dates within area 2 (the main area for which we have data)
all_dates = ['2019-03-29','2019-05-10','2019-06-21','2019-07-19','2019-08-30','2019-10-11','2019-11-22','2020-01-03','2020-02-14','2020-03-27','2020-05-08','2020-06-19','2020-07-17','2020-08-28','2020-10-09','2021-10-08','2021-11-19']

# converting to the datetime format required for dataframes
all_dates = pd.to_datetime(all_dates)
#print ('Original radar image dates:\n', all_dates,'\n')

# daily all dates for the sequence of 15 ALOS-2 PALSAR-2 images between March 2019 and October 2020
start_date = all_dates[0]
end_date = all_dates[14]
#print ('Start and end dates: \n',start_date,end_date,'\n')

all_dates1 = pd.date_range(start_date,end_date-timedelta(days=1),freq='d')
print ('Daily interpolated radar image dates:\n', all_dates1)

dates_list = all_dates1[:560]
print ("concatenating the outputs...")
WL_final = xr.concat(WL_final, dim = 'time').assign_coords(time = dates_list)

WL_final = WL_final.to_dataset(name='water_level')
WL_final = WL_final.drop('variable')


In [15]:
%%time
# saving the outputs 
print ("Saving WL_final to WL_daily_final.nc...")
WL_final.to_netcdf(path = ALOS_OUT + 'WL_daily_final.nc')
WL_final

Saving WL_final to WL_daily_final.nc...
CPU times: user 89.2 ms, sys: 35.8 s, total: 35.9 s
Wall time: 40.3 s


### QUICKSTART HERE

In [13]:
# opening the estimated water levels dataset
WL_final = xr.open_dataset(ALOS_OUT + 'WL_daily_final.nc')

### Max, min, mean, std dev across 20-month study period

In [None]:
%%time
WL_mean = WL_final['water_level'].mean(dim='time')
WL_min = WL_final['water_level'].min(dim='time')
WL_max = WL_final['water_level'].max(dim='time')
WL_stdev = WL_final['water_level'].std(dim='time')
WL_mean.plot(cmap='RdBu')

In [36]:
# saving to netcdf
WL_mean.to_netcdf(ALOS_OUT + 'WL_mean.nc')
WL_min.to_netcdf(ALOS_OUT + 'WL_min.nc')
WL_max.to_netcdf(ALOS_OUT + 'WL_max.nc')
WL_stdev.to_netcdf(ALOS_OUT + 'WL_stdev.nc')

### Max, min, mean and standard deviation over each month in the water level sequence
- final paper images (supplementary)

In [7]:
%%time
# resampling to monthly
print ('resampling mean...')
WL_monthly_mean = WL_final['water_level'].resample(time='1M').mean()

print ('resampling max...')
WL_monthly_max = WL_final['water_level'].resample(time='1M').max()

print ('resampling min...')
WL_monthly_min = WL_final['water_level'].resample(time='1M').min()

print ('resampling std dev...')
WL_monthly_stdev = WL_final['water_level'].resample(time='1M').std()


resampling mean...
resampling max...
resampling min...
resampling std dev...
CPU times: user 1min 19s, sys: 1min 21s, total: 2min 41s
Wall time: 4min 23s


In [10]:
# save to netcdf
WL_monthly_mean.to_netcdf(ALOS_OUT + 'WL_monthly_mean.nc')
WL_monthly_min.to_netcdf(ALOS_OUT + 'WL_monthly_min.nc')
WL_monthly_max.to_netcdf(ALOS_OUT + 'WL_monthly_max.nc')
WL_monthly_stdev.to_netcdf(ALOS_OUT + 'WL_monthly_stdev.nc')

In [6]:
%%time
# resampling the linear data to monthly - comparing this with the final resampled data
# perhaps best to do this for both the linear and rainfed data, before deciding on with p-value/correlation
# criteria to use
print ('resampling mean...')
WL_monthly_mean_l = ds_l['water_level'].resample(time='1M').mean()

print ('resampling max...')
WL_monthly_max_l = ds_l['water_level'].resample(time='1M').max()

print ('resampling min...')
WL_monthly_min_l = ds_l['water_level'].resample(time='1M').min()

print ('resampling std dev...')
WL_monthly_stdev_l = ds_l['water_level'].resample(time='1M').std()

resampling mean...
resampling max...
resampling min...
resampling std dev...
CPU times: user 1min 7s, sys: 1min 7s, total: 2min 14s
Wall time: 3min 56s


In [7]:
%%time
# save to netcdf
WL_monthly_mean_l.to_netcdf(ALOS_OUT + 'WL_monthly_mean_l.nc')
WL_monthly_min_l.to_netcdf(ALOS_OUT + 'WL_monthly_min_l.nc')
WL_monthly_max_l.to_netcdf(ALOS_OUT + 'WL_monthly_max_l.nc')
WL_monthly_stdev_l.to_netcdf(ALOS_OUT + 'WL_monthly_stdev_l.nc')

CPU times: user 36.5 ms, sys: 2.49 s, total: 2.53 s
Wall time: 2.58 s


### FINAL paper images
- Supplementary: mean, min, max, stdev
- Main paper: all data max, min, mean, stdev and amplitude

### QUICKSTART for monthly data

In [15]:
WL_monthly_mean = xr.open_dataset(ALOS_OUT + 'WL_monthly_mean.nc')
WL_monthly_min = xr.open_dataset(ALOS_OUT + 'WL_monthly_min.nc')
WL_monthly_max = xr.open_dataset(ALOS_OUT + 'WL_monthly_max.nc')
WL_monthly_stdev = xr.open_dataset(ALOS_OUT + 'WL_monthly_stdev.nc')

In [16]:
# monthly amplitude
WL_monthly_amplitude = WL_monthly_max - WL_monthly_min

In [None]:
# plotting monthly mean
var_name = 'Mean monthly water level (cm)'
ds = WL_monthly_mean['water_level'].to_dataset(name = var_name)
#ds = WL_monthly_mean.copy()
plot_maps2(ds,var_name,-20,50)

In [None]:
# plotting monthly amplitude
var_name = 'Monthly amplitude of the water level (cm)'
ds = WL_monthly_mean['water_level'].to_dataset(name = var_name)
#ds = WL_monthly_mean.copy()
plot_maps2(ds,var_name,0,30)

In [None]:
# plotting monthly min
var_name = 'Min monthly water level (cm)'
ds = WL_monthly_min['water_level'].to_dataset(name = var_name)

plot_maps(ds,var_name,-20,50)

In [None]:
# plotting monthly max
var_name = 'Max monthly water level (cm)'
ds = WL_monthly_max['water_level'].to_dataset(name = var_name)

plot_maps(ds,var_name,-20,50)