### Import Needed Packages

In [None]:
import os

import plotly.express as px
import numpy as np
import xarray as xr

import basd

### Root data path

In [None]:
data_path = '../../../OneDrive - PNNL/Documents/BASD/data'

# Precipitation Data

Here we will adjust precipitation output from NCAR's CESM2-WACCM model using observational data from EWEMBI.

First we set the paths to our data:

In [None]:
#obs_hist_path = 'pr_day_CESM2-WACCM_historical_r2i1p1f1_gn_20100101-20150101.nc'
pr_obs_hist_path = 'EWEMBI/pr_ewembi_2011_2016.nc4'
pr_sim_hist_path = 'CMIP6/CESM2-WACCM/pr_day_CESM2-WACCM_historical_r1i1p1f1_gn_20100101-20150101.nc'
pr_sim_fut_path = 'CMIP6/CESM2-WACCM/pr_day_CESM2-WACCM_ssp370_r1i1p1f1_gn_20950101-21010101.nc'

and read in our data using the `xarray` package

In [None]:
pr_obs_hist = xr.open_dataset(os.path.join(data_path, pr_obs_hist_path),
                              decode_coords = 'all')
pr_sim_hist = xr.open_dataset(os.path.join(data_path, pr_sim_hist_path),
                              decode_coords = 'all')
pr_sim_fut = xr.open_dataset(os.path.join(data_path, pr_sim_fut_path),
                             decode_coords = 'all')

## Setting parameters and creating Adjustment Object
For precipitation, we set the following parameters:

* Lower bound: 0
* Lower threshold: 0.0000011547
* Trend preservation method: 'mixed'
* Distribution: 'gamma'
* Value to set cells with only invalid values: 0

We then create the bias adjustment object with our data, parameters object, and ask to have our data remapped to match in resolution. This is needed in this case as the EWEMBI data is 360x720 (lat x lon), where CESM2-WACCM is 192x288.

In [None]:
params = basd.Parameters(lower_bound=0,
                         lower_threshold=0.0000011574,
                         trend_preservation='mixed',
                         distribution='gamma',
                         if_all_invalid_use=0)


In [None]:
pr_ba = basd.Adjustment(pr_obs_hist,
                        pr_sim_hist,
                        pr_sim_fut,
                        'pr',
                        params,
                        remap_grid=True)

# Adjustment at one location
Here we ask to perform bias adjustment at the 100th row and 100th column cell position. This happens to correspond to 4.421 degrees latitude (on -90 to 90) and 125 degrees longitude (on -180 to 180). This choice was arbitrary.

Note that to run a full grid adjustment, one would use `pr_ba.adjust_bias()`. However, this is extremely computationally extensive and is recommended to be run on computing cluster to make use of the parallel implementation of this function.

In [None]:
pr_sim_fut_ba_loc = pr_ba.adjust_bias_one_location(dict(lat=100, lon=100))

# Plots

### Histogram
This plot shows the distribution of precipitation over the time period Jan 1, 2095 - Dec 31, 2100 (our input simulated future period) at our given grid cell, before and after bias adjustment.

In [None]:
pr_sim_fut_ba_loc.plot_hist(scale='log')

### Empirical CDF
This plot gives the empirical CDFs of precipitation for each of our input data, and bias adjusted result, at the given grid cell.

We can see here how the relationship between the observational and simulated historical data, is indeed transferred from the simulated future data to the bias adjusted result.

In [None]:
pr_sim_fut_ba_loc.plot_ecdf(log_x=True)

# Downscaling Precipitation
From the result from above, we want to downscale to the observational data resolution.

In [None]:
bias_corrected_data_path = os.path.join(data_path, 'bias-corrected')
pr_ba_path = 'pr_day_CESM2-WACCM_ssp370_r1i1p1f1_gn_20950101-21010101_biascorrected_ewembi.nc'

In [None]:
pr_ba = xr.open_dataset(os.path.join(bias_corrected_data_path, pr_ba_path),
                        decode_coords = 'all')

In [None]:
pr_sd = basd.Downscaler(pr_obs_hist, pr_ba, 'pr', params)

In [None]:
pr_sd.downscale_one_location(dict(lat=0, lon=0))

In [None]:
pr_sd.sim_coarse.to_netcdf(os.path.join(data_path, 'pr_ba_coarse_reprojection.nc'))
pr_sd.sim_fine.to_netcdf(os.path.join(data_path, 'pr_ba_fine_reprojection.nc'))

# Shortwave Radiation Data

Now we will adjust surface downwelling shortwave radiation output from MIROC's model using observational data from EWEMBI.

Again we set the paths to our data:

In [None]:
rsds_obs_hist_path = 'EWEMBI/rsds_ewembi_2011_2016.nc4'
rsds_sim_hist_path = 'CMIP6/MIROC6/rsds_day_MIROC6_historical_r2i1p1f1_gn_20100101-20141231.nc'
rsds_sim_fut_path = 'CMIP6/MIROC6/rsds_day_MIROC6_ssp370_r2i1p1f1_gn_20150101-20241231.nc'

reading in with `xarray`:

In [None]:
rsds_obs_hist = xr.open_dataset(os.path.join(data_path, rsds_obs_hist_path),
                                decode_coords = 'all')
rsds_sim_hist = xr.open_dataset(os.path.join(data_path, rsds_sim_hist_path),
                                decode_coords = 'all')
rsds_sim_fut = xr.open_dataset(os.path.join(data_path, rsds_sim_fut_path),
                               decode_coords = 'all')

## Creating Parameter and Adjustment Objects
For `rsds` we have a bit more involved process to set up the bias adjustment, though that is all taken care of for us when we specify parameters. This is because we are going to first scale `rsds` to the interval [0,1], at which point is assumed to follow a Beta distribution. We'll then set our remaining parameters accordingly.

Our data is scaled to [0,1] by setting each observation to be how large the observation is compared to the largest observation in a surrounding window. We get to choose how large the window by specifying the half width (so number of days just before or after). Here we set a half running window size of 15, thus a full window size of 31.

Again, we also want to remap the observational data to match the simulated data's resolution.


In [None]:
rsds_params = basd.Parameters(halfwin_ubc=15,
                              trend_preservation='bounded',
                              distribution='beta',
                              lower_bound=0,
                              upper_bound=1,
                              lower_threshold=0.0001,
                              upper_threshold=0.9999,
                              if_all_invalid_use=0)
rsds_ba = basd.Adjustment(rsds_obs_hist,
                          rsds_sim_hist,
                          rsds_sim_fut,
                          'rsds',
                          rsds_params,
                          remap_grid=True)

# Adjustment at one location
Here we ask to perform bias adjustment at the 100th row and 100th column cell position. This time this happens to correspond to 51.1 degrees latitude and 140.6 degrees longitude. Again, arbitrarily selected.

In [None]:
rsds_sim_fut_ba_loc = rsds_ba.adjust_bias_one_location(dict(lat=100, lon=100))

# Plots II

### Histogram
Shows the distribution of surface downwelling shortwave radiation before and after bias adjustment at the chosen grid cell.

In [None]:
rsds_sim_fut_ba_loc.plot_hist()

### Empirical CDF
Shows the empirical CDFs for each input data source and resulting adjustment time series, at the chosen grid cell.

In [None]:
px.ecdf(rsds_sim_fut_ba_loc.time_series, x='rsds', color='Source')

# Near-Surface Relative Humidity

Now we will adjust near-surface relative humidity (hurs) output from MIROC's model using observational data from EWEMBI.

Again we set the paths to our data:

In [None]:
hurs_obs_hist_path = 'EWEMBI/hurs_ewembi_2011_2016.nc4'
hurs_sim_fut_path = 'CMIP6/MIROC6/hurs_day_MIROC6_ssp370_r1i1p1f1_gn_20150101-20241231.nc'
hurs_ba_path = 'hurs_day_MIROC6_ssp370_r1i1p1f1_gn_20150101-20241231_bias-corrected_ewembi.nc'

In [None]:
hurs_obs_hist = xr.open_dataset(os.path.join(data_path, hurs_obs_hist_path),
                                decode_coords = 'all')
hurs_sim_fut = xr.open_dataset(os.path.join(data_path, hurs_sim_fut_path),
                          decode_coords = 'all')
hurs_ba = xr.open_dataset(os.path.join(bias_corrected_data_path, hurs_ba_path),
                          decode_coords = 'all')

In [None]:
hurs_params = basd.Parameters(lower_bound=0,
                              lower_threshold=0.01,
                              upper_bound=100,
                              upper_threshold=99.99,
                              trend_preservation='bounded',
                              distribution='beta',
                              if_all_invalid_use=0,
                              n_iterations=10
                             )
hurs_ds = basd.Downscaler(hurs_obs_hist, hurs_ba, 'hurs', hurs_params)

In [None]:
hurs_ds.sim_coarse.to_netcdf(os.path.join(data_path, 'hurs_ba_coarse_reprojection.nc'))
hurs_ds.sim_fine.to_netcdf(os.path.join(data_path, 'hurs_ba_fine_reprojection.nc'))

In [None]:
basd.reproject_for_integer_factors(hurs_obs_hist, hurs_ba)

In [None]:
hurs_obs_hist.coords['lon'] = hurs_obs_hist.coords['lon'] + 180
 #df.coords['lon'] = (df.coords['lon'] + 180) % 360 - 180
 #df = df.sortby(df.lon)

In [None]:
hurs_ba = hurs_ba.reset_coords(names=['lat_bnds', 'lon_bnds'], drop=True)

In [None]:
width = hurs_ba.coords['lon'].values[1] - hurs_ba.coords['lon'].values[0]
hurs_ba.coords['lon'] = hurs_ba.coords['lon'] + (width/2)

In [None]:
hurs_obs_hist

In [None]:
hurs_sim_fut

In [None]:
hurs_ba

In [None]:
obs_fine = hurs_obs_hist
sim_coarse = hurs_ba

In [None]:
fine_lats = obs_fine.coords['lat'].values
fine_lons = obs_fine.coords['lon'].values
coarse_lats = sim_coarse.coords['lat'].values
coarse_lons = sim_coarse.coords['lon'].values
f_lat = len(fine_lats) / len(coarse_lats)
f_lon = len(fine_lons) / len(coarse_lons)

In [None]:
isinstance(f_lat, int) & isinstance(f_lon, int)

In [None]:
f_lat = len(fine_lats) // len(coarse_lats)
f_lon = len(fine_lons) // len(coarse_lons)

# Assert the coordinate reference system. Assumes CRS known by code ESPG:4326
sim_coarse.rio.write_crs(4326, inplace=True)
sim_coarse_xy = sim_coarse.rename({'lon': 'x', 'lat': 'y'}).transpose('time', 'y', 'x', ...)
obs_fine_xy = obs_fine.rename({'lon': 'x', 'lat': 'y'}).transpose('time', 'y', 'x', ...)

In [None]:
obs_coarse = obs_fine[dict(time=0)].coarsen(lat=f_lat).mean().coarsen(lon=f_lon).mean()

In [None]:
(len(fine_lats) // f_lat, len(fine_lons) // f_lon)

In [None]:
sim_coarse_xy

In [None]:
sim_coarse_xy.rio.reproject(dst_crs="EPSG:4326",
                            shape=(len(fine_lats) // f_lat,
                                   len(fine_lons) // f_lon))

In [None]:
xr.Dataset({})

In [None]:
latitudes = np.linspace(fine_lats[0], fine_lats[-1], len(fine_lats) // f_lat)
longitudes = np.linspace(fine_lons[0], fine_lons[-1], len(fine_lons) // f_lon)
latitudes, longitudes = np.meshgrid(latitudes, longitudes)

In [None]:
obs_fine_xy.coarsen(x=f_lon).mean().coarsen(y=f_lat).mean()