#### The era5 data is preprocessed (geotif to netcdf file) and downloaded from the gee platform
Note: the data is download from gee platform. Downloading by API request has preblems for the ERA5-Land data


In [2]:
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import numpy as np
import rasterio
from glob import glob
import re

In [3]:
file_list_t2m = glob('data/climate/era5_from_gee/*t2m*.tif')
file_list_tp = glob('data/climate/era5_from_gee/*tp*.tif')
file_list_te = glob('data/climate/era5_from_gee/*te*.tif')
file_list_te

['data/climate/era5_from_gee/era5_land_yearly_te_2020.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2015.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2000.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2010.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2005.tif']

#### For easy manipulation, we convert the geotiff image to .nc file. 


In [4]:
# Function to extract year from file name
def extract_year(file_name):
  match = re.search(r'\d{4}', file_name)
  return int(match.group()) if match else None

def tiff_to_dataarray(file_path):
  with rasterio.open(file_path) as src:
    data = src.read(1)
    coords = {
      'latitude': src.bounds.top - (src.res[1] * np.arange(src.height)),
      'longitude': src.bounds.left + (src.res[0] * np.arange(src.width)),
    }
    return xr.DataArray(data, dims=['latitude', 'longitude'], coords=coords)
### 2m temperature data
dataarrays_t2m = [tiff_to_dataarray(file) for file in file_list_t2m]
era5_t2m = xr.concat(dataarrays_t2m, dim='time')
era5_t2m['time'] = [extract_year(file) for file in file_list_t2m]
### total precipitation data
dataarrays_tp = [tiff_to_dataarray(file) for file in file_list_tp]
era5_tp = xr.concat(dataarrays_tp, dim='time')
era5_tp['time'] = [extract_year(file) for file in file_list_tp]
### total evaporation data
dataarrays_te = [tiff_to_dataarray(file) for file in file_list_te]
era5_te = xr.concat(dataarrays_te, dim='time')
era5_te['time'] = [extract_year(file) for file in file_list_te]
# Merge the Datasets into a single Dataset
dset_era5 = xr.merge([era5_t2m.rename('t2m'), era5_tp.rename('tp'), era5_te.rename('te')])
### add attributes
dset_era5['t2m'].attrs['units'] = 'Celsius'
dset_era5['t2m'].attrs['long_name'] = '2 metre temperature'
dset_era5['tp'].attrs['units'] = 'mm'
dset_era5['tp'].attrs['long_name'] = 'Total precipitation'
dset_era5['te'].attrs['units'] = 'mm'
dset_era5['te'].attrs['long_name'] = 'Total evoporation'
# dset_era5.to_netcdf('data/climate/era5_from_gee/era5_hma.nc')
dset_era5
