# Validation for quantile mapping bias adjustment

In [None]:
import sys
import calendar
import glob

import xarray as xr
import dask.diagnostics
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import cmocean
import pandas as pd

sys.path.append('/g/data/wp00/shared_code/qqscale')
import utils

In [None]:
dask.diagnostics.ProgressBar().register()

In [None]:
# Parameters
example_lat = -42.9
example_lon = 147.3
example_month = 6

In [None]:
# Required parameters
assert 'hist_var' in locals(), "Must provide an historical variable name (option -p hist_var {name})"
assert 'ref_var' in locals(), "Must provide a reference variable name (option -p ref_var {name})"
assert 'target_var' in locals(), "Must provide a target variable name (option -p target_var {name})"
assert 'hist_units' in locals(), "Must provide historical units (option -p hist_units {units})"
assert 'ref_units' in locals(), "Must provide reference units (option -p ref_units {units})"
assert 'target_units' in locals(), "Must provide target units (option -p target_units {units})"
assert 'output_units' in locals(), "Must provide output units (option -p output_units {units})"
assert 'adjustment_file' in locals(), "Must provide an adjustment factors file (option -p adjustment_file {file path})"
assert 'hist_files' in locals(), """Must provide historical data files (option -p hist_files {"file paths"})"""
assert 'ref_files' in locals(), """Must provide reference data files (option -p ref_files {"file paths"})"""
assert 'target_files' in locals(), """Must provide target data files (option -p target_files {"file paths"})"""
assert 'target_q_file' in locals(), """Must provide target data files (option -p target_files {"file paths"})"""
assert 'qq_file' in locals(), "Must provide an qq-scaled data file (option -p qq_file {file path})"
assert 'hist_time_bounds' in locals(), """Must provide time bounds for historical data (option -p hist_time_bounds {"YYYY-MM-DD YYYY-MM-DD"})"""
assert 'ref_time_bounds' in locals(), """Must provide time bounds for reference data (option -p ref_time_bounds {"YYYY-MM-DD YYYY-MM-DD"})"""
assert 'target_time_bounds' in locals(), """Must provide time bounds for target data (option -p target_time_bounds {"YYYY-MM-DD YYYY-MM-DD"})"""

In [None]:
model_units = hist_units
obs_units = ref_units
model_var = hist_var
obs_var = ref_var

training_time_bounds = hist_time_bounds.split()
assessment_time_bounds = target_time_bounds.split()

hist_files = hist_files.split()
ref_files = ref_files.split()
target_files = target_files.split()

In [None]:
point_selection = {'lat': example_lat, 'lon': example_lon}

In [None]:
plot_config = {}
if model_var == 'tasmin':
    plot_config['regular_cmap'] = cmocean.cm.thermal
    plot_config['diverging_cmap'] = 'RdBu_r'
    plot_config['general_levels'] = [-4.0, -2.5, -1, 0.5, 2, 3.5, 5, 6.5, 8, 9.5, 11, 12.5, 14, 15.5, 17, 18.5, 20, 21.5, 23, 24.5]
    plot_config['af_levels'] = None
    plot_config['difference_levels'] = [0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
elif model_var == 'tasmax':
    plot_config['regular_cmap'] = cmocean.cm.thermal
    plot_config['diverging_cmap'] = 'RdBu_r'
    plot_config['general_levels'] = [5, 7.5, 10, 12.5, 15, 17.5, 20, 22.5, 25, 27.5, 30, 32.5, 35]
    plot_config['af_levels'] = None
    plot_config['difference_levels'] = [0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2]
elif model_var == 'pr':
    plot_config['regular_cmap'] = cmocean.cm.rain
    plot_config['diverging_cmap'] = 'BrBG'
    plot_config['general_levels'] = [0, 0.01, 10, 20, 30, 40, 50, 60, 70, 80, 90]
    plot_config['af_levels'] = [0.125, 0.25, 0.5, 0.67, 0.8, 1, 1.25, 1.5, 2, 4, 8]
    plot_config['difference_levels'] = [-0.55, -0.45, -0.35, -0.25, -0.15, -0.05, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55]
else:
    raise ValueError(f'No plotting configuration defined for {hist_var}')

In [None]:
ds_model_training = utils.read_data(
    hist_files,
    model_var,
    time_bounds=training_time_bounds,
    input_units=model_units,
    output_units=output_units,
)

In [None]:
da_model_training = ds_model_training[hist_var].sel(point_selection, method='nearest')

In [None]:
ds_model_assessment = utils.read_data(
    target_files,
    model_var,
    time_bounds=assessment_time_bounds,
    input_units=model_units,
    output_units=output_units,
)

In [None]:
da_model_assessment = ds_model_assessment[model_var].sel(point_selection, method='nearest')

In [None]:
ds_obs = utils.read_data(
    ref_files,
    obs_var,
    input_units=obs_units,
    output_units=output_units,
)

In [None]:
ds_obs_training = ds_obs.sel(time=slice(training_time_bounds[0], training_time_bounds[1]))
da_obs_training = ds_obs_training[obs_var].sel(point_selection, method='nearest')

In [None]:
ds_obs_assessment = ds_obs.sel(time=slice(assessment_time_bounds[0], assessment_time_bounds[1]))
da_obs_assessment = ds_obs_assessment[obs_var].sel(point_selection, method='nearest')

In [None]:
ds_adjust = xr.open_dataset(adjustment_file)

In [None]:
target_q = xr.open_dataset(target_q_file)

In [None]:
ds_bc = xr.open_dataset(qq_file)

In [None]:
da_bc = ds_bc[model_var].sel(point_selection, method='nearest')

In [None]:
lat_min_bc = ds_bc['lat'].values.min()
lat_max_bc = ds_bc['lat'].values.max()
lon_min_bc = ds_bc['lon'].values.min()
lon_max_bc = ds_bc['lon'].values.max()

In [None]:
print(lat_min_bc, lat_max_bc, lon_min_bc, lon_max_bc)

In [None]:
def quantile_month_plot(da, ax, cmap_type, levels=None, extend='both', point=None, title=None):
    """Create two dimensional month/quantile plot"""
    
    assert cmap_type in ['regular', 'diverging']
    
    quantiles = da.sel(point_selection, method='nearest')
    
    cmap = plot_config[f'{cmap_type}_cmap']
    kwargs = {}
    if levels:
        kwargs['levels'] = levels
    elif cmap_type == 'diverging':
        abs_max = np.max(np.abs(quantiles.values))
        vmax = abs_max
        vmin = -1 * abs_max
        kwargs['vmax'] = vmax
        kwargs['vmin'] = vmin
    
    quantiles.transpose('month', 'quantiles').plot(ax=ax, cmap=cmap, extend=extend, **kwargs)
    
    yticks = np.arange(1,13)
    ytick_labels = [calendar.month_abbr[i] for i in yticks]
    ax.set_yticks(yticks, ytick_labels)
    ax.invert_yaxis()
    if title:
        ax.set_title(title)

In [None]:
def quantile_spatial_plot(
    da, month, cmap_type, lat_bounds=None, lon_bounds=None, levels=None,
):
    """Spatial plot of the 10th, 50th and 90th percentile"""
    
    da_selection = da.sel({'quantiles': [.1, .5, .9], 'month': month}, method='nearest')
    if lat_bounds:
        lat_min, lat_max = lat_bounds
        da_selection = da_selection.sel(lat=slice(lat_min, lat_max))
    if lon_bounds:
        lon_min, lon_max = lon_bounds
        da_selection = da_selection.sel(lon=slice(lon_min, lon_max))
        
    cmap = plot_config[f'{cmap_type}_cmap']
    kwargs = {}
    if levels:
        kwargs['levels'] = levels
    elif cmap_type == 'diverging':
        abs_max = np.max(np.abs(da_selection.values))
        vmax = abs_max
        vmin = -1 * abs_max
        kwargs['vmax'] = vmax
        kwargs['vmin'] = vmin
        
    p = da_selection.plot(
        col='quantiles',
        transform=ccrs.PlateCarree(),
        cmap=cmap,
        figsize=[20, 5.5],
        subplot_kws={'projection': ccrs.PlateCarree(),},
        **kwargs,
    )
    for ax in p.axes.flat:
        ax.coastlines()
        ax.plot(example_lon, example_lat, 'go', zorder=5, transform=ccrs.PlateCarree())
    plt.suptitle(calendar.month_name[month])
    plt.show()

In [None]:
if 'lat' in ds_adjust.dims:
    quantile_spatial_plot(
        ds_adjust['hist_q'],
        example_month,
        'regular',
        levels=plot_config['general_levels'],
        lat_bounds=[lat_min_bc, lat_max_bc],
        lon_bounds=[lon_min_bc, lon_max_bc],
    )

In [None]:
fig = plt.figure(figsize=[20, 17])
ax1 = fig.add_subplot(311)
ax2 = fig.add_subplot(312)
ax3 = fig.add_subplot(313)

quantile_month_plot(
    ds_adjust['ref_q'],
    ax1,
    'regular',
    levels=plot_config['general_levels'],
    title='AGCD (1980-1999)',
    extend='both',
)

quantile_month_plot(
    ds_adjust['hist_q'],
    ax2,
    'regular',
    levels=plot_config['general_levels'],
    title='historical (1980-1999)',
    extend='both',
)

quantile_month_plot(
    target_q[model_var],
    ax3,
    'regular',
    levels=plot_config['general_levels'],
    title='historical (2000-2019)',
    extend='both',
)

plt.show()

In [None]:
fig = plt.figure(figsize=[10, 6])

bins = np.arange(-5, 150, 1)    

da_obs_training.plot.hist(
    bins=bins,
    density=True,
    label='AGCD (1980-1999)',
    facecolor='tab:blue',
    alpha=0.5,
    rwidth=0.9,
)

da_model_training.plot.hist(
    bins=bins,
    density=True,
    label='historical (1980-1999)',
    facecolor='tab:green',
    alpha=0.5,
    rwidth=0.9,
)

plt.ylabel('probability')
plt.xlim(-5, 20)
plt.suptitle('Training period')
plt.legend()
plt.show()

In [None]:
da_model_training.values.mean()

In [None]:
da_obs_training.values.mean()

In [None]:
fig = plt.figure(figsize=[10, 6])

bins = np.arange(-5, 150, 1)    

da_obs_assessment.plot.hist(
    bins=bins,
    density=True,
    label='AGCD (2000-2019)',
    facecolor='tab:blue',
    alpha=0.5,
    rwidth=0.9,
)

da_model_assessment.plot.hist(
    bins=bins,
    density=True,
    label='historical (2000-2019)',
    facecolor='tab:green',
    alpha=0.5,
    rwidth=0.9,
)
plt.ylabel('probability')
plt.xlim(-5, 20)
plt.suptitle('Assessment period')
plt.legend()
plt.show()

In [None]:
fig = plt.figure(figsize=[10, 6])

bins = np.arange(-5, 150, 1)    

da_obs_assessment.plot.hist(
    bins=bins,
    density=True,
    label='AGCD (2000-2019)',
    facecolor='tab:blue',
    alpha=0.5,
    rwidth=0.9,
)

da_bc.plot.hist(
    bins=bins,
    density=True,
    label='bias corrected historical (2000-2019)',
    facecolor='tab:green',
    alpha=0.5,
    rwidth=0.9,
)
plt.ylabel('probability')
plt.xlim(-5, 20)
plt.legend()
plt.suptitle('Assessment period')
plt.show()

In [None]:
if 'lat' in ds_adjust.dims:
    quantile_spatial_plot(
        ds_adjust['af'],
        example_month,
        'diverging',
        levels=plot_config['af_levels'],
        lat_bounds=[lat_min_bc, lat_max_bc],
        lon_bounds=[lon_min_bc, lon_max_bc],
    )

In [None]:
fig = plt.figure(figsize=[12, 5])
ax1 = fig.add_subplot(111)

quantile_month_plot(
    ds_adjust['af'],
    ax1,
    'diverging',
    levels=plot_config['af_levels'],
    title='adjustment factors'
)

plt.show()

In [None]:
model_training_clim = ds_model_training[model_var].mean('time', keep_attrs=True)
obs_training_clim = ds_obs_training[obs_var].mean('time', keep_attrs=True)
model_assessment_clim = ds_model_assessment[model_var].mean('time', keep_attrs=True)
obs_assessment_clim = ds_obs_assessment[obs_var].mean('time', keep_attrs=True)
bc_clim = ds_bc[model_var].mean('time', keep_attrs=True)

In [None]:
model_training_clim['lat'] = bc_clim['lat']
model_training_clim['lon'] = bc_clim['lon']

obs_training_clim['lat'] = bc_clim['lat']
obs_training_clim['lon'] = bc_clim['lon']

model_assessment_clim['lat'] = bc_clim['lat']
model_assessment_clim['lon'] = bc_clim['lon']

obs_assessment_clim['lat'] = bc_clim['lat']
obs_assessment_clim['lon'] = bc_clim['lon']

In [None]:
training_mean_bias = model_training_clim - obs_training_clim
training_mean_bias = training_mean_bias.compute()

In [None]:
fig = plt.figure(figsize=[24, 6])

ax1 = fig.add_subplot(131, projection=ccrs.PlateCarree())
model_training_clim.plot(
    ax=ax1,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax1.set_title('historical (1980-1999)')

ax2 = fig.add_subplot(132, projection=ccrs.PlateCarree())
obs_training_clim.plot(
    ax=ax2,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax2.set_title('AGCD (1980-1999)')

ax3 = fig.add_subplot(133, projection=ccrs.PlateCarree())
training_mean_bias.plot(
    ax=ax3,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax3.set_title('Difference (historical - AGCD)')

for ax in [ax1, ax2, ax3]:
    ax.coastlines()
    ax.plot(example_lon, example_lat, 'go', zorder=5, transform=ccrs.PlateCarree())
xmin, xmax = ax3.get_xlim()
ymin, ymax = ax3.get_ylim()
ax1.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
ax2.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
plt.suptitle('Bias in annual mean daily minimum temperature (training period)')
plt.show()

In [None]:
assessment_mean_bias = model_assessment_clim - obs_assessment_clim
assessment_mean_bias = assessment_mean_bias.compute()

In [None]:
fig = plt.figure(figsize=[24, 6])

ax1 = fig.add_subplot(131, projection=ccrs.PlateCarree())
model_assessment_clim.plot(
    ax=ax1,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax1.set_title('historical (2000-2019)')

ax2 = fig.add_subplot(132, projection=ccrs.PlateCarree())
obs_assessment_clim.plot(
    ax=ax2,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax2.set_title('AGCD (2000-2019)')

ax3 = fig.add_subplot(133, projection=ccrs.PlateCarree())
assessment_mean_bias.plot(
    ax=ax3,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax3.set_title('Difference (historical - AGCD)')

for ax in [ax1, ax2, ax3]:
    ax.coastlines()
    ax.plot(example_lon, example_lat, 'go', zorder=5, transform=ccrs.PlateCarree())
xmin, xmax = ax3.get_xlim()
ymin, ymax = ax3.get_ylim()
ax1.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
ax2.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
plt.suptitle('Bias in annual mean daily minimum temperature (assessment period)')
plt.show()

In [None]:
bc_mean_bias = bc_clim - obs_assessment_clim
bc_mean_bias = bc_mean_bias.compute()

In [None]:
fig = plt.figure(figsize=[24, 6])

ax1 = fig.add_subplot(131, projection=ccrs.PlateCarree())
bc_clim.plot(
    ax=ax1,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax1.set_title('bias corrected historical (2000-2019)')

ax2 = fig.add_subplot(132, projection=ccrs.PlateCarree())
obs_assessment_clim.plot(
    ax=ax2,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['regular_cmap'],
    levels=plot_config['general_levels'],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax2.set_title('AGCD (2000-2019)')

ax3 = fig.add_subplot(133, projection=ccrs.PlateCarree())
bc_mean_bias.plot(
    ax=ax3,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax3.set_title('Difference (historical - AGCD)')

for ax in [ax1, ax2, ax3]:
    ax.coastlines()
    ax.plot(example_lon, example_lat, 'go', zorder=5, transform=ccrs.PlateCarree())
xmin, xmax = ax3.get_xlim()
ymin, ymax = ax3.get_ylim()
ax1.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
ax2.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
plt.suptitle('Bias in annual mean daily minimum temperature (assessment period)')
plt.show()

In [None]:
bc_improvement = np.fabs(bc_mean_bias) - np.fabs(assessment_mean_bias)

In [None]:
fig = plt.figure(figsize=[24, 6])

ax1 = fig.add_subplot(131, projection=ccrs.PlateCarree())
bc_mean_bias.plot(
    ax=ax1,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax1.set_title('bias corrected historical bias (2000-2019)')

ax2 = fig.add_subplot(132, projection=ccrs.PlateCarree())
assessment_mean_bias.plot(
    ax=ax2,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax2.set_title('historical bias (2000-2019)')

ax3 = fig.add_subplot(133, projection=ccrs.PlateCarree())
bc_improvement.plot(
    ax=ax3,
    transform=ccrs.PlateCarree(),
    cmap=plot_config['diverging_cmap'],
    levels=[-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2.0, 2.5],
    extend='both',
    cbar_kwargs={'label': 'daily minimum temperature (C)'}
)
ax3.set_title('Abs difference (bc - historical)')

for ax in [ax1, ax2, ax3]:
    ax.coastlines()
    ax.plot(example_lon, example_lat, 'go', zorder=5, transform=ccrs.PlateCarree())
xmin, xmax = ax3.get_xlim()
ymin, ymax = ax3.get_ylim()
ax1.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
ax2.set_extent([xmin, xmax, ymin, ymax], crs=ccrs.PlateCarree())
plt.suptitle('Bias in annual mean daily minimum temperature (assessment period)')
plt.show()