In [2]:
import xarray as xr
from ocf_blosc2 import Blosc2
from tqdm import tqdm

In [3]:
month_to_times = {
    1: (8, 16),
    2: (8, 17),
    3: (7, 18),
    4: (7, 19),
    5: (6, 20),
    6: (5, 20),
    7: (5, 20),
    8: (6, 20),
    9: (7, 19),
    10: (7, 18),
    11: (7, 16),
    12: (8, 16)
}
type = "weather"
NWP_FEATURES = ["t_500", "clcl", "alb_rad", "tot_prec", "ww", "relhum_2m", "h_snow", "aswdir_s", "td_2m", "omega_1000"]
for month in tqdm(range(1, 12 + 1)):
    print('opening dataset')
    hrv = xr.open_dataset(
        f"/data/{type}/2021/{month}.zarr.zip",
        engine="zarr",
        consolidated=True,
        chunks={"time": "auto"}
    )
    start, stop = month_to_times[month]
    print('filtering')
    filtered_dataset_lazy = hrv.where((hrv['time'].dt.hour >= start) & (hrv['time'].dt.hour <= stop), drop=True)
    print('chunking')
    filtered_dataset_lazy = filtered_dataset_lazy.chunk("auto")
    print('writing')
    filtered_dataset_lazy = filtered_dataset_lazy[NWP_FEATURES]
    filtered_dataset_lazy.to_zarr(f"/data/{type}_proc/2021/{month}.zarr")



  0%|          | 0/12 [00:00<?, ?it/s]

opening dataset
filtering
chunking
writing


  8%|▊         | 1/12 [00:07<01:23,  7.60s/it]

opening dataset
filtering
chunking
writing


 17%|█▋        | 2/12 [00:19<01:38,  9.85s/it]

opening dataset
filtering
chunking
writing


 25%|██▌       | 3/12 [00:31<01:37, 10.87s/it]

opening dataset
filtering
chunking
writing


 33%|███▎      | 4/12 [00:44<01:33, 11.67s/it]

opening dataset
filtering
chunking
writing


 42%|████▏     | 5/12 [01:10<01:57, 16.85s/it]

opening dataset
filtering
chunking
writing


 50%|█████     | 6/12 [01:29<01:45, 17.59s/it]

opening dataset
filtering
chunking
writing


 58%|█████▊    | 7/12 [01:48<01:31, 18.35s/it]

opening dataset
filtering
chunking
writing


 67%|██████▋   | 8/12 [02:05<01:11, 17.77s/it]

opening dataset
filtering
chunking
writing


 75%|███████▌  | 9/12 [02:20<00:50, 16.82s/it]

opening dataset
filtering
chunking
writing


 83%|████████▎ | 10/12 [02:37<00:33, 16.88s/it]

opening dataset
filtering
chunking
writing


 92%|█████████▏| 11/12 [02:51<00:16, 16.20s/it]

opening dataset
filtering
chunking
writing


100%|██████████| 12/12 [03:06<00:00, 15.56s/it]
