In [1]:
import hvplot.xarray  # noqa
import hvplot.pandas  # noqa
import xarray as xr
import numpy as np
import pandas as pd
import panel as pn
import panel.widgets as pnw

# For performance reasons panel sliders used by hvplot should only re-evaluate on mouseup
pn.config.throttled = True

## Bootstrap samples (bootstrap_mean, bootstrap_median, bootstrap_std) and Expected Values of original fit (expected_values)

This notebooks shows how to run a bootstrap procedure. It generates multiple re-sampled timeseries by reshuffling blocks (with length of one-year) of the residuals to the expected values estimated from the original timeseries.

Note: You might need to generate the data first, e.g. by calling
```
attrici detrend \
    --gmt-file tests/data/20CRv3-ERA5_germany_ssa_gmt.nc \
    --input-file tests/data/20CRv3-ERA5_germany_obs.nc \
    --variable pr \
    --output-dir tests/data/output \
    --bootstrap-sample-count 100 \
    --overwrite
```

In [2]:
ds = xr.open_mfdataset("../tests/data/output/bootstrap/pr/**/*.nc")
ds.attrs = []
ds

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 350.98 kiB Shape (2, 2, 44925) (1, 1, 44925) Dask graph 4 chunks in 11 graph layers Data type float64 numpy.ndarray",44925  2  2,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 350.98 kiB Shape (2, 2, 44925) (1, 1, 44925) Dask graph 4 chunks in 11 graph layers Data type float64 numpy.ndarray",44925  2  2,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 350.98 kiB Shape (2, 2, 44925) (1, 1, 44925) Dask graph 4 chunks in 11 graph layers Data type float64 numpy.ndarray",44925  2  2,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,350.98 kiB
Shape,"(2, 2, 44925)","(1, 1, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.08 MiB,3.77 MiB
Shape,"(2, 2, 11, 44925)","(1, 1, 11, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.08 MiB 3.77 MiB Shape (2, 2, 11, 44925) (1, 1, 11, 44925) Dask graph 4 chunks in 11 graph layers Data type float64 numpy.ndarray",2  1  44925  11  2,

Unnamed: 0,Array,Chunk
Bytes,15.08 MiB,3.77 MiB
Shape,"(2, 2, 11, 44925)","(1, 1, 11, 44925)"
Dask graph,4 chunks in 11 graph layers,4 chunks in 11 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [3]:
# adjust units to mm/day
for d in ds.data_vars:
    if ds[d].attrs["units"] == "kg m-2 s-1":
        ds[d] *= 86400
        ds[d].attrs["units"] = "mm d-1"

In [4]:
def plot(d, **kwargs):
    lon_range = d.lon.max() - d.lon.min()
    lat_range = d.lat.max() - d.lat.min()
    lon_margin = 7 * (lon_range - 360) / (0.5 - 360)
    lat_margin = 7 * (lat_range - 360) / (0.5 - 360)
    return d.hvplot(
        "lon",
        "lat",
        xlim=(d.lon.min() - lon_margin, d.lon.max() + lon_margin),
        ylim=(d.lat.min() - lat_margin, d.lat.max() + lat_margin),
        geo=True,
        coastline=True,
        **kwargs,
    )

## Time evolution

### Expected Values with 95% percentile range

In [5]:
lon = 9.25
lat = 50.75

df = pd.DataFrame(
    dict(
        lower=ds.bootstrap_quantiles.sel(lat=lat, lon=lon, quantile=0.025),
        upper=ds.bootstrap_quantiles.sel(lat=lat, lon=lon, quantile=0.975),
        expected=ds.expected_values.sel(lat=lat, lon=lon),
    ),
    index=ds.time,
)

#### Daily

In [6]:
df.hvplot.area(
    y="lower", y2="upper", ylabel=ds.expected_values.units
) * df.expected.hvplot(color="red", legend=False)

#### Annual mean

In [7]:
d = df.groupby(df.index.year).mean()
d.hvplot.area(
    y="lower", y2="upper", ylabel=ds.expected_values.units
) * d.expected.hvplot(color="red", legend=False)

### Bootstrap Std

In [8]:
ds.bootstrap_std.hvplot(x="time")

### Difference of Bootstrap Mean and Expected Values

In [9]:
(ds.bootstrap_mean - ds.expected_values).rename(
    "bootstrap_mean - expected_values"
).hvplot(x="time")

## RMSE of Bootstrap Mean and Expected Values for last year

In [10]:
def rmse(d1, d2, time=None, interactive=False):
    if time is None:
        # use last year by default
        time = str(list(d1.time.groupby("time.year").groups.keys())[-1])
    diff = d1 - d2
    if interactive:
        # if time is interactive, interactive() has to be called inside this
        # function, before mean(dim="time"). also, the difference needs a name.
        diff = diff.rename("difference").interactive()
    return np.sqrt((diff.sel(time=time) ** 2).mean(dim="time"))

In [11]:
plot(
    rmse(ds.expected_values, ds.bootstrap_mean),
    title="RMSE (expected_values, bootstrap_mean) last year",
)

## Annual Mean of Bootstrap Standard Deviation

In [12]:
selected_year = "2023"
plot(
    ds.bootstrap_std.sel(time=selected_year).mean(dim="time"),
    title=f"Mean of Boostrap Std {selected_year}",
)

## With interactive year selection

In [13]:
years = list(ds.time.groupby("time.year").groups.keys())
year = pnw.DiscreteSlider(
    name="time", options=[str(i) for i in years], value=str(years[-1])
)

rmse_plot = plot(
    rmse(ds.expected_values, ds.bootstrap_mean, time=year, interactive=True),
    title="Annual RMSE (expected_values, bootstrap_mean)",
)
mean_bs_std_plot = plot(
    ds.bootstrap_std.interactive().sel(time=year).mean(dim="time"),
    title="Annual mean of Bootstrap Std",
)

pn.Row(rmse_plot, mean_bs_std_plot)