# ERA5-Land monthly to OGGM model inputs (flattened HMA)

This notebook converts ERA5-Land monthly datasets (temperature, precipitation, invariant) into flattened regional NetCDF files compatible with OGGM (`ecmwf.process_ecmwf_data`). All parameters are configured at the top.


## Configuration


In [1]:
# Paths and domain
BASE_DIR = r'E:\revised_NCC_data\OGGM model\subregion_modeling\ERA5_Land data'
OUT_DIR = BASE_DIR  # write next to inputs by default

# HMA domain (lon: 67-104, lat: 46-26)
LON_MIN, LON_MAX = 66.0, 105.0
LAT_MAX, LAT_MIN = 47.0, 25.0  # note: decreasing with .sel

# Input filenames
INV_DIR = 'ERA5_Land_invariant'
T2M_FILE = 'ERA5_Land_temp_1950to2025.nc'
PRCP_FILE = 'ERA5_Land_prep_1950to2025.nc'

# Output filenames
INV_OUT = 'era5_land_invariant_flat_HMA.nc'
T2M_OUT = 'era5_land_monthly_t2m_1950-2025_flat_HMA.nc'
PRCP_OUT = 'era5_land_monthly_prcp_1950-2025_flat_HMA.nc'


In [2]:
# Imports
import os
import numpy as np
import xarray as xr

# Display options
xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x25d20ce8ec0>

## Invariant (geopotential, land-sea mask) → flattened HMA


In [19]:
# Load and crop invariant
files = os.listdir(os.path.join(BASE_DIR, INV_DIR))
paths = [os.path.join(BASE_DIR, INV_DIR, f) for f in files]

dsi = xr.open_mfdataset(paths)
dsi = dsi.sel(longitude=slice(LON_MIN, LON_MAX), 
                    latitude=slice(LAT_MAX, LAT_MIN)).isel(time=0)

# Stack to 2D points and set time to reference
inv_flat = dsi.stack(points=('latitude', 'longitude')).reset_index(('points'))
inv_flat = inv_flat.expand_dims({'time': [np.datetime64('1950-01-01')]})

# Save
inv_flat.to_netcdf(os.path.join(OUT_DIR, INV_OUT))
inv_flat

Unnamed: 0,Array,Chunk
Bytes,675.09 kiB,675.09 kiB
Shape,"(1, 86411)","(1, 86411)"
Dask graph,1 chunks in 17 graph layers,1 chunks in 17 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 675.09 kiB 675.09 kiB Shape (1, 86411) (1, 86411) Dask graph 1 chunks in 17 graph layers Data type float64 numpy.ndarray",86411  1,

Unnamed: 0,Array,Chunk
Bytes,675.09 kiB,675.09 kiB
Shape,"(1, 86411)","(1, 86411)"
Dask graph,1 chunks in 17 graph layers,1 chunks in 17 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,675.09 kiB,675.09 kiB
Shape,"(1, 86411)","(1, 86411)"
Dask graph,1 chunks in 17 graph layers,1 chunks in 17 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 675.09 kiB 675.09 kiB Shape (1, 86411) (1, 86411) Dask graph 1 chunks in 17 graph layers Data type float64 numpy.ndarray",86411  1,

Unnamed: 0,Array,Chunk
Bytes,675.09 kiB,675.09 kiB
Shape,"(1, 86411)","(1, 86411)"
Dask graph,1 chunks in 17 graph layers,1 chunks in 17 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## Temperature (t2m) → flattened HMA time series


In [3]:
# Load, crop and drop unneeded vars
ds_t = xr.open_dataset(os.path.join(BASE_DIR, T2M_FILE))
ds_t = ds_t.sel(longitude=slice(LON_MIN, LON_MAX), latitude=slice(LAT_MAX, LAT_MIN), 
                valid_time=slice('1950-01-01', '2024-12-01'))
if 'expver' in ds_t.variables:
    ds_t = ds_t.drop_vars('expver')

# Stack all vars over lat/lon, then rename valid_time -> time
flat = ds_t.stack(points=('latitude', 'longitude')).reset_index(('points'))
flat = flat.rename({'valid_time': 'time'})
flat = flat.where(np.isfinite(flat['t2m']), drop=True)

# Save
flat.to_netcdf(os.path.join(OUT_DIR, T2M_OUT))

# Preview
xr.open_dataset(os.path.join(OUT_DIR, T2M_OUT))


## Precipitation (tp) → flattened HMA time series


In [4]:
# Load and crop precipitation
ds_p = xr.open_dataset(os.path.join(BASE_DIR, PRCP_FILE))
ds_p = ds_p.sel(longitude=slice(LON_MIN, LON_MAX), latitude=slice(LAT_MAX, LAT_MIN),
                valid_time=slice('1950-01-01', '2024-12-01'))
if 'expver' in ds_p.variables:
    ds_p = ds_p.drop_vars('expver')

#ds_p['tp'] = ds_p['tp'] * 1000 * ds_p['valid_time.daysinmonth'] # to mm/month
#ds_p['tp'].attrs['units'] = 'mm/month'

flat = ds_p.stack(points=('latitude', 'longitude')).reset_index(('points'))
flat = flat.rename({'valid_time': 'time'})
flat = flat.where(np.isfinite(flat['tp']), drop=True)

# Keep encoding simple (OGGM reads values)
flat.to_netcdf(os.path.join(OUT_DIR, PRCP_OUT))

xr.open_dataset(os.path.join(OUT_DIR, PRCP_OUT))


## Notes
- `z` in invariant is geopotential (m²/s²). OGGM expects geometric height, which it computes as `z/cfg.G`.
- `tp` is total precipitation (m/day monthly mean); OGGM converts to monthly totals internally using `daysinmonth`.
- The flattened outputs match the “latest” HMA layout expected by OGGM: invariant, t2m, tp.
