# Compute model bias

In [None]:
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import rioxarray as rxr
import simplejson
import tqdm
import xarray as xr

## Reference data

### E-OBS

In [None]:
eobs_tas = xr.open_dataset("reference/tg_ens_mean_0.1deg_reg_v30.0e_REMAP_MEAN_1971-2000.nc")
eobs_tas = (eobs_tas["tg"] + 273.15).squeeze(drop=True).rio.write_crs(4326)  # convert to kelvin

eobs_pr = xr.open_dataset("reference/rr_ens_mean_0.1deg_reg_v30.0e_REMAP_MEAN_YEARSUM_1971-2000.nc")
eobs_pr = eobs_pr["rr"].squeeze(drop=True).rio.write_crs(4326)

### ERA5

In [None]:
era5_tas = xr.open_dataset("reference/era5_1971_2000_tas_MEAN_REMAP.nc")
era5_tas = era5_tas["t2m"].squeeze(drop=True).rio.write_crs(4326)

era5_pr = xr.open_dataset("reference/era5_1971_2000_total_precip_YEARSUM_REMAP.nc")
era5_pr = era5_pr["tp"].squeeze(drop=True).rio.write_crs(4326)

## Model data

In [None]:
def preprocess(ds):
    name, _ = os.path.splitext(os.path.basename(ds.encoding["source"]))
    _, _, gcm, _, ens, rcm, *_ = name.split("_")
    ds = ds.drop_vars(['time', 'time_bnds'])
    ds =  ds.expand_dims({"model": [f"{gcm} {rcm} {ens}"]})
    if "height" in ds.coords:
        ds = ds.drop_vars(['height'])
    return ds

models = xr.open_mfdataset("models-hist/*1971_2000*.nc4", preprocess=preprocess).squeeze().load()
models = models.rio.write_crs(4326)

## Zonal statistics of bias


Run the regions-euro-cordex notebook to generate the regions file.

In [None]:
regions = gpd.read_file("../data/regions.geojson")

In [None]:
def bias(ref_temperature, ref_precipitation):
    """Bias as (model - reference)"""
    return xr.Dataset({
        "tas": (models["tas"] - ref_temperature),
        "pr": (models["pr"] * 86400. - ref_precipitation + 0.00001) / (ref_precipitation + 0.00001) * 100.  # in percent
    })

def zonal_stats(bias, regions):
    results = {}
    for nuts_id, region in tqdm.tqdm(regions.set_index("id").iterrows()):
        data_clip = bias.rio.clip([region.geometry], all_touched=True)
        area_weights = np.cos(np.deg2rad(data_clip["latitude"]))
        median = data_clip.weighted(area_weights).quantile(0.5, dim=["latitude", "longitude"])
        results[nuts_id] = {
            "tas": median["tas"].values.round(3).tolist(),
            "pr": median["pr"].values.round(2).tolist()
        }
    return results

bias_by_ref = {
    "era5": zonal_stats(bias(era5_tas, era5_pr), regions),
    "eobs": zonal_stats(bias(eobs_tas, eobs_pr), regions)
}

In [None]:
bias_restructured = {
    variable: {
        "bias": {
            reference: {
                region: bias_by_ref[reference][region][variable]
                for region in regions.id
            }
            for reference in ["eobs", "era5"]
        }
    }
    for variable in ["pr", "tas"]
}

In [None]:
out = {
    **bias_restructured,
    "models": [dict(zip(["gcm", "rcm", "ens"], m.split(" "))) for m in models["model"].values],
    "attrs": {
        "pr": {
            "name": "precipitation",
            "bias": {
                "unit": "%",
                "period": "1971-2000"
            }
        },
        "tas": {
            "name": "temperature",
            "bias": {
                "unit": "Â°C",
                "period": "1971-2000"
            }
        }
    }
}

with open("../data/data.json", "w") as f:
    simplejson.dump(out, f, sort_keys=True, ignore_nan=True)