#### The era5 data is downloaded from the gee platform (geotif format)
Note: Data download by official API request has preblems for the ERA5-Land data.   



In [None]:
import rasterio
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
import rioxarray
from glob import glob
import warnings
warnings.filterwarnings('ignore')


In [10]:
path_hma_gtng = 'data/hma-extent/HMA/gtng_202307_hma_subregions.gpkg'
file_list_t2m = glob('data/climate/era5_from_gee/*t2m*.tif')
file_list_tp = glob('data/climate/era5_from_gee/*tp*.tif')
file_list_te = glob('data/climate/era5_from_gee/*te*.tif')
file_list_te


['data/climate/era5_from_gee/era5_land_yearly_te_2000.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2005.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2010.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2020.tif',
 'data/climate/era5_from_gee/era5_land_yearly_te_2015.tif']

In [11]:
hma_gtng = gpd.read_file(path_hma_gtng)


#### 1. Convert the geotiff image to .nc file. 


In [12]:
def read_merge_tiffs(file_list):
    dataarrays = []
    file_time = [file.split('.')[0][-4:] for file in file_list]
    for file in file_list:
        with rasterio.open(file) as src:
            data = src.read(1)
            coords = {
                'lat': src.bounds.top - (src.res[1] * np.arange(src.height)),
                'lon': src.bounds.left + (src.res[0] * np.arange(src.width))}
            dataarrays.append(xr.DataArray(data, dims=['lat', 'lon'], coords=coords))
    da_merged = xr.concat(dataarrays, dim='time').assign_coords(time=file_time)
    return da_merged
# xda = read_merge_tiffs(file_list_t2m)
# xda


In [13]:
era5_t2m = read_merge_tiffs(file_list_t2m)   # temperature 2m data
era5_tp = read_merge_tiffs(file_list_tp)     # precipitation data
era5_te = read_merge_tiffs(file_list_te)     # evaporation data
# Merge the Datasets into a single Dataset
era5_xds = xr.merge([era5_t2m.rename('t2m'), era5_tp.rename('tp'), era5_te.rename('te')])
### add attributes
era5_xds['t2m'].attrs['units'] = 'Celsius'
era5_xds['t2m'].attrs['long_name'] = '2m temperature'
era5_xds['tp'].attrs['units'] = 'mm'
era5_xds['tp'].attrs['long_name'] = 'Total precipitation'
era5_xds['te'].attrs['units'] = 'mm'
era5_xds['te'].attrs['long_name'] = 'Total evoporation'
# era5_xds.to_netcdf('data/climate/era5_from_gee/era5_hma.nc')
era5_xds


#### 2. Clip the dataset to the HMA region and calculate statistics

In [14]:
era5_xds = era5_xds.rio.write_crs("EPSG:4326")
era5_xds = era5_xds.rio.set_spatial_dims(x_dim='lon', y_dim='lat')


In [None]:
era5_hma_xds = era5_xds.rio.clip(hma_gtng.geometry, hma_gtng.crs, drop=True)    
era5_hma_mean_xds = era5_hma_xds.mean(dim=['lat', 'lon'])
era5_hma_std_xds = era5_hma_xds.std(dim=['lat', 'lon'])
era5_hma_mean_xds
era5_hma_stats_df = pd.DataFrame({
  'year': era5_hma_mean_xds['time'].values,
  't2m_mean': era5_hma_mean_xds['t2m'].values,
  't2m_std': era5_hma_std_xds['t2m'].values,
  'tp_mean': era5_hma_mean_xds['tp'].values,
  'tp_std': era5_hma_std_xds['tp'].values,
  'te_mean': era5_hma_mean_xds['te'].values,
  'te_std': era5_hma_std_xds['te'].values
  })
# era5_hma_stats_df.to_csv('data/climate/era5_from_gee/era5_hma_stats.csv', index=False)
era5_hma_stats_df


Unnamed: 0,year,t2m_mean,t2m_std,tp_mean,tp_std,te_mean,te_std
0,2000,-0.110297,7.929097,823.269042,864.480223,-392.335179,188.038721
1,2005,0.118795,7.994581,788.953745,747.157039,-388.390714,197.308452
2,2010,0.873627,7.824923,871.234751,961.16821,-392.977058,188.886095
3,2015,0.741264,7.955596,775.524573,799.487095,-398.918661,198.086796
4,2020,0.794922,7.691664,825.91009,954.954189,-382.970528,201.36948


#### 3. Clip era5 to hma subregions and calculate statistics


In [78]:
### calculate statistics for hma subregions and save to a dictionary
### key: region name, value: clipped xarray dataset
era5_hma_subregion = {}
for idx, row in hma_gtng.iterrows():
    region_name = row['full_name']
    geometry = [row['geometry']]
    clipped = era5_xds.rio.clip(geometry, hma_gtng.crs, drop=True)
    era5_hma_subregion[region_name] = clipped
era5_hma_subregion[region_name]

### Calculate mean for each year and each subregion, store as a DataFrame
hma_subregions_stats = []
for region_name, xds in era5_hma_subregion.items():  # for each subregion
    for year in xds['time'].values:        # for each year in the subregion
        mean_t2m = float(xds['t2m'].sel(time=year).mean().values)
        mean_tp = float(xds['tp'].sel(time=year).mean().values)
        mean_te = float(xds['te'].sel(time=year).mean().values)
        geometry = hma_gtng.loc[hma_gtng['full_name'] == region_name, 'geometry'].values[0]
        hma_subregions_stats.append({
            'region': region_name,
            'year': year,
            't2m_mean': mean_t2m,
            'tp_mean': mean_tp,
            'te_mean': mean_te,
            'geometry': geometry
            })

hma_subregions_stat_df = gpd.GeoDataFrame(hma_subregions_stats, \
                                            geometry='geometry', crs=hma_gtng.crs)
hma_subregions_stat_df.to_csv('data/climate/era5_from_gee/era5_hma_subregions_stats.csv', index=False)
hma_subregions_stat_df.head(6)


Unnamed: 0,region,year,t2m_mean,tp_mean,te_mean,geometry
0,Hissar Alay,2000,5.977831,798.537703,-423.39884,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
1,Hissar Alay,2005,5.969801,817.264665,-443.917581,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
2,Hissar Alay,2010,6.965905,945.949682,-452.230907,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
3,Hissar Alay,2015,6.712568,906.098206,-438.339262,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
4,Hissar Alay,2020,6.198827,818.928195,-444.369027,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
5,Pamir (Safed Khirs / West Tarim),2000,-6.286692,607.587301,-300.975995,"MULTIPOLYGON (((74.35547 39.80418, 74.37581 39..."
