In [1]:
from pathlib import Path
import asyncio
import json
import fsspec
from collections import defaultdict

from kerchunk.combine import MultiZarrToZarr, merge_vars, auto_dask
from kerchunk.zarr import ZarrToZarr

import numpy as np
import xarray as xr
import pvdeg

MODEL_OUTS_DIR = Path("/projects/inspire/PySAM-MAPS/v1.1/model-outs/")
POSTPROCESS_OUTS_DIR = Path("/projects/inspire/PySAM-MAPS/v1.1/postprocess/")

In [None]:
pvdeg.geospatial.start_dask(32)

In [2]:
def load_model_outs_zarrs(confs: list[str]) -> dict[str, list[xr.Dataset]]: 
    """
    Find and load all model outputs results zarrs.
    """
    # FIND all MODEL_OUTS paths by config
    model_outs_all_conf_zarrs_paths = {}
    for conf in confs:
        conf_zarrs_paths = model_outs_all_conf_zarrs_paths.get(conf, [])
        model_outs_all_conf_zarrs_paths[conf] = conf_zarrs_paths

        for dir in MODEL_OUTS_DIR.iterdir():
            for conf_dir in dir.glob(conf):
                model_outs_all_conf_zarrs_paths[conf] += list(conf_dir.glob("*.zarr"))
        print(f"found {len(model_outs_all_conf_zarrs_paths[conf])} zarrs for model outs conf {conf}")

    # LOAD all MODEL_OUTS zarrs by config
    model_outs_all_conf_zarrs = {}
    for conf in confs:
        print(f"loading model outs zarrs for conf {conf}")
        conf_zarrs = model_outs_all_conf_zarrs.get(conf, [])
        model_outs_all_conf_zarrs[conf] = conf_zarrs

        for path in model_outs_all_conf_zarrs_paths[conf]:
            model_outs_chunk = xr.open_zarr(path)
            model_outs_all_conf_zarrs[conf].append(model_outs_chunk)
        print(f"loaded {len(model_outs_all_conf_zarrs[conf])} MODEL OUTS zarrs to for config {conf}.")

    return model_outs_all_conf_zarrs

def load_postprocessing_zarrs(confs: list[str]) -> dict[str, list[xr.Dataset]]: 
    """
    Find and load all postprocessing results zarrs.
    """
    # find all postprocessing zarrs paths
    postprocess_all_conf_zarrs_paths = {}
    for conf in confs:
        conf_zarrs_paths = postprocess_all_conf_zarrs_paths.get(conf, [])
        postprocess_all_conf_zarrs_paths[conf] = conf_zarrs_paths

        for state_dir in POSTPROCESS_OUTS_DIR.iterdir():
            for conf_zarr in state_dir.glob("*.zarr"):

                postprocess_all_conf_zarrs_paths[conf].append(conf_zarr)

    # load all postprocessing zarrs
    postprocess_all_conf_zarrs = {}
    for conf in confs:
        print(f"loading postprocessing zarrs for conf {conf}")
        conf_zarrs = postprocess_all_conf_zarrs.get(conf, [])
        postprocess_all_conf_zarrs[conf] = conf_zarrs

        for path in postprocess_all_conf_zarrs_paths[conf]:
            postprocess_state_conf = xr.open_zarr(path).drop_dims(10) # extra dim in dataset
            postprocess_all_conf_zarrs[conf].append(postprocess_state_conf)

        print(f"loaded {len(postprocess_all_conf_zarrs[conf])} POSTPROCESS zarrs to for config {conf}.")

    return postprocess_all_conf_zarrs

In [None]:
confs = ["01",]# "02", "03", "04", "05", "06", "07", "08", "09", "10"]

model_outs_all_conf_zarrs = load_model_outs_zarrs(confs)
postprocess_all_conf_zarrs = load_postprocessing_zarrs(confs)

model_res = xr.concat(model_outs_all_conf_zarrs['01'], dim="gid")

found 1660 zarrs for model outs conf 01
loading model outs zarrs for conf 01
loaded 1660 MODEL OUTS zarrs to for config 01.
loading postprocessing zarrs for conf 01
