# ERA5: process data

Process raw ERA5 data (0.25°, 1-hourly) which is too large to directly use. Compute
- annual TXx
- annual TNn
- daily T mean

**NOTE**: use iacpy3_2023 kernel for faster flox version

In [None]:
from pathlib import Path
import os

from utils import era5
import xarray as xr

In [None]:
Path("../data/era5/tnn/").mkdir(exist_ok=True, parents=True)
Path("../data/era5/txx/").mkdir(exist_ok=True, parents=True)
Path("../data/era5/tmean/").mkdir(exist_ok=True, parents=True)

## Get TXx and TNn from hourly t2m data

In [None]:
fc = era5.files_orig.find_files(time_res="1h", variable="t2m")

for filename, meta in fc:
    year = meta["year"]
    fN_out_txx = era5.files_post.create_full_name(variable="txx", year=year)
    fN_out_tnn = era5.files_post.create_full_name(variable="tnn", year=year)

    if os.path.isfile(fN_out_txx) and os.path.isfile(fN_out_tnn):
        continue

    print(f"- {year}")
    print(f"  - src: {filename}")
    print(f"  - dst: {fN_out_txx}")
    print(f"  - dst: {fN_out_tnn}")

    ds = xr.open_dataset(filename)

    ds = ds.rename(longitude="lon", latitude="lat")

    # remove poles
    ds = ds.sel(lat=era5.SEL_LAT)

    ds = ds.load()

    # compute annual max
    txx = ds.resample(time="A").max()
    txx.to_netcdf(fN_out_txx)

    # compute annual min
    tnn = ds.resample(time="A").min()
    tnn.to_netcdf(fN_out_tnn)

## Get daily t mean from hourly t2m data

In [None]:
fc = era5.files_orig.find_files(time_res="1h", variable="t2m")

for filename, meta in fc:

    year = meta["year"]
    fN_out = era5.files_post.create_full_name(variable="tmean", year=year)

    if os.path.isfile(fN_out):
        continue

    print(f"- {year}")
    print(f"  - src: {filename}")
    print(f"  - dst: {fN_out}")

    ds = xr.open_dataset(filename)
    ds = ds.rename(longitude="lon", latitude="lat")

    # remove poles
    ds = ds.sel(lat=era5.SEL_LAT)

    tmean = ds.resample(time="d").mean()
    tmean.to_netcdf(fN_out)