In [1]:
# Extract 1-hourly 500 hPa Temperature from regridded vertical velocity (hur) data of UM zoom 6 model
# Extract 6-ourly 500 hPa Temperature from regridded vertical velocity (hur) data of ICON zoom 6 model
import xarray as xr
from pathlib import Path
import easygems.healpix as egh
import healpy as hp
import numpy as np
from metpy.calc import vertical_velocity_pressure
from metpy.units import units
import pandas as pd

p = 500 # hPa; level of interest
data = "/g/data/qx55/uk_node/glm.n2560_RAL3p3/data.healpix.PT1H.z6.zarr" #PT1H is hourly data
data = "/g/data/qx55/germany_node/d3hp003.zarr/PT6H_point_z6_atm.zarr" #PT6H is 6-hourly data
ds = xr.open_zarr(data)

vname = 'tas'
r = ds[vname]
#r = ds['hur'].sel(pressure=p*100)

def get_nn_lon_lat_index(nside, lons, lats):
    """
    nside: integer, power of 2. The return of hp.get_nside()
    lons: uniques values of longitudes
    lats: uniques values of latitudes
    returns: array with the HEALPix cells that are closest to the lon/lat grid
    """
    lons2, lats2 = np.meshgrid(lons, lats)
    return xr.DataArray(
        hp.ang2pix(nside, lons2, lats2, nest = True, lonlat = True),
        coords=[("latitude", lats), ("longitude", lons)],
    )

KeyError: "No variable named 'tas'. Did you mean one of ('ts', 'ta', 'vas', 'uas')?"

In [None]:
this_nside = hp.get_nside(r)
res = 1  # resolution
#MC lon: 100 - 149E, lat: 13S to 13N
lon = np.arange(100, 149 + res, res)
lat = np.arange(-13, 13 + res, res)

cells = get_nn_lon_lat_index(this_nside, lon, lat) 
r_regrid = r.isel(cell=cells)

In [None]:
# Create time selection (no need to modify)
full_range = pd.date_range(start="2020-03-01", end="2021-02-28", freq="3h")
full_range = pd.date_range(start="2020-03-01", end="2021-02-28", freq="6h")
date_str_list = full_range.strftime("%Y-%m-%dT%H:%M:%S").tolist()

#exclude_start = pd.Timestamp("2020-05-01 00:00:00")
#exclude_end = pd.Timestamp("2020-09-30 00:00:00")
#filtered_range = full_range[(full_range < exclude_start) | (full_range > exclude_end)]
#date_str_list = filtered_range.strftime("%Y-%m-%dT%H:%M:%S").tolist()

dir_out = '/scratch/nf33/hk25_LSP/UK/MC_chunk/'
#dir_out = '/scratch/nf33/hk25_LSP/Germany/MC_chunk/'
Path(dir_out).mkdir(exist_ok=True)

# ✅ Start looping only over the gridded lat/lon (efficient)
grid_id = 1
for i in range(0, len(lat) - 1, 2): 
    for j in range(0, len(lon) - 1, 2):  
        lat_box = slice(lat[i], lat[i+1])
        lon_box = slice(lon[j], lon[j+1])

        try:
            r_box = r_regrid.sel(latitude=lat_box, longitude=lon_box)

            #r500_box = r_box.sel(time=date_str_list).mean(dim=("latitude", "longitude"))
            r500_box = r_box.sel(time=date_str_list)#.mean(dim=("latitude", "longitude"))
            r500_box = r500_box.to_dataset(name=vname)

            fname = f'{dir_out}{vname}_3hr_{grid_id:03d}.nc'
            fname = f'{dir_out}{vname}_6hr_{grid_id:03d}.nc'
            r500_box.to_netcdf(fname)
            print(f"Saved {fname}")

        except Exception as e:
            print(f"Skipped grid {grid_id:03d} due to error: {e}")

        grid_id += 1
