In [None]:
import xarray as xr
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import json

import unidecode
import simplejson

In [None]:
regions = gpd.read_file("../../data/regions.geojson")

regions["FILENAME"] = (
    regions["NAME_LATN"]
    .map(unidecode.unidecode)
    .str.replace(",", "")
    .str.replace(".", "")
    .str.replace(":", "")
    .str.replace("--", "")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("/", "")
    .str.replace("'", "")
)

regions = regions.set_index(regions["FILENAME"])

In [None]:
with open("../../data/metadata.json") as f:
    metadata = json.load(f)

## Uncertainty files

In [None]:
%%bash
rm uncertainty/*/*/*/merged.nc

Merge all regions within each folder first.

In [None]:
def add_region_dim(ds):
    name = os.path.basename(ds.encoding["source"])
    region = unidecode.unidecode(" ".join(name.split("_")[5:-2]))
    return ds.load().drop_vars(["time_bnds", "height"], errors="ignore").assign_coords({"region": [region]})

for model in metadata["models"]:
    for scenario in ["historical", "rcp26", "rcp45", "rcp85"]:
        for variable in ["tas", "pr"]:
            folder = f"uncertainty/{model['gcm']}_{model['rcm']}/{scenario}/{variable}"
            if not os.path.isdir(folder) or os.path.exists(f"{folder}/merged.nc") or not os.listdir(folder):
                print(folder, "SKIPPING")
                continue
            print(folder)
            ds = xr.open_mfdataset(f"{folder}/*.nc", preprocess=add_region_dim)
            ds = ds.assign_coords({
                "scenario": [scenario],
                "model": ["{gcm} {rcm} {ens}".format(**model)]
            })
            ds["time"] = ds["time"].dt.year
            ds.to_netcdf(f"{folder}/merged.nc")
            ds.close()

Merge all into a single file

In [None]:
ds_all = (
    xr.open_mfdataset("uncertainty/*/*/*/merged.nc")
    .squeeze()
    .drop_sel({"region": "Area Metropolitana de Lisboa"})
    .load()
)
ds_all["region"] = regions.loc[ds_all["region"].values]["id"].values
ds_all.to_netcdf("uncertainty.nc")

## Ensmean file

In [None]:
%%bash
rm ensmean/*/*/merged.nc

Same as above, but without the model dimension

In [None]:
def add_region_dim(ds):
    name = os.path.basename(ds.encoding["source"])
    region = unidecode.unidecode(" ".join(name.split("_")[2:-2]))
    return (
        ds.load()
        .drop_vars(["time_bnds", "height"], errors="ignore")
        .assign_coords({"region": [region]})
    )

for scenario in ["historical", "rcp26", "rcp45", "rcp85"]:
    for variable in ["tas", "pr"]:
        folder = f"ensmean/{scenario}/{variable}"
        if not os.path.isdir(folder) or os.path.exists(f"{folder}/merged.nc") or not os.listdir(folder):
            print(folder, "SKIPPING")
            continue
        print(folder)
        ds = xr.open_mfdataset(os.path.join(folder, "*.nc"), preprocess=add_region_dim)
        ds = ds.assign_coords({"scenario": [scenario]})
        ds["time"] = ds["time"].dt.year
        ds.to_netcdf(f"{folder}/merged.nc")
        ds.close()

In [None]:
ds_all = (
    xr.open_mfdataset("ensmean/*/*/merged.nc")
    .squeeze()
    .drop_sel({"region": "Area Metropolitana de Lisboa"})
    .load()
)
ds_all["region"] = regions.loc[ds_all["region"].values]["id"].values
ds_all.to_netcdf("ensmean.nc")