# Workflow Example with ERA5-Land input convertor
Here we illustrate a complete workflow example using the ERA5 convertor

## Imports

In [1]:
from pathlib import Path

import xarray as xr
import pandas as pd

import valenspy as vp #The Valenspy package
from valenspy.inputconverter_functions import _non_convertor, ERA5Land_to_CF

from yaml import safe_load
from typing import Union, List

In [2]:
#Import Converter - This input converter will not do anything to the data.
ic = vp.InputConverter(ERA5Land_to_CF)


## 1. Convert the data

### Get the filenames of ERA5 data corresponding to CORDEX variable name


In [3]:
machine = "hortense"

# User defined variable, here we look at 2m temperature. 
variable = "hfls"
dataset = "ERA5-Land"
region = "belgium" # "belgium"
time_freq = "daily" ## Important to account here are the postprocessed files at daily time resolution and their naming (eg max daily temperature does not correspond to ERA5 variable name as defined!!)

### Generate the path and filename of obs file

In [4]:
# Get the current file directory and load the CORDEX variables.yml file

# files = Path(__file__).resolve().parent -- this is not working in notebook
src_path = Path("../src/valenspy") ## -- to be removed. 

with open(src_path / "ancilliary_data" / "CORDEX_variables.yml") as file:
    CORDEX_VARIABLES = safe_load(file)

# ERA5-Land uses same lookup table as ERA5
with open(src_path / "ancilliary_data" / Path("ERA5_lookup.yml")) as file:
    obs_LOOKUP = safe_load(file)

In [5]:
# alternative to PATH generator. 

# get path of observational datasets from path settings .yml
with open(src_path / "ancilliary_data" / Path("dataset_PATHS.yml")) as file:
    dataset_PATHS = safe_load(file)

directory = dataset_PATHS[machine][dataset]

# get ERA5 variable corresponding to the requested variable using its look-up table
obs_var = obs_LOOKUP[variable]['obs_name']

# get ERA5 subdirectory
subdirectory = Path(directory + '/' + region +'/'+time_freq+'/'+obs_LOOKUP[variable]['obs_long_name']+'/')

# open the EOBS file for the corresponding variable
files = list(subdirectory.glob("*-"+time_freq+"-*"+obs_LOOKUP[variable]['obs_long_name']+"*20*.nc")) #Select all the netCDF files in the directory
files

[PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2007.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2008.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2018.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2012.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2021.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200

In [6]:
# test of ERA5 convertor function
ERA5Land_ds = ic.convert_input(files) #Convert the input to the correct format
ERA5Land_ds

The file is NOT ValEnsPy CF compliant.
0.00% of the variables are ValEnsPy CF compliant
NOT ValEnsPy CF compliant: ['hfls']
Unknown to ValEnsPy: ['time_bnds']
The following attributes are missing or incorrect for the variable hfls:
Attribute       Actual                    Expected                 
-----------------------------------------------------------------
units           kg m-2 s-1                W m-2                    


Unnamed: 0,Array,Chunk
Bytes,125.56 kiB,5.72 kiB
Shape,"(8036, 2)","(366, 2)"
Dask graph,22 chunks in 45 graph layers,22 chunks in 45 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 125.56 kiB 5.72 kiB Shape (8036, 2) (366, 2) Dask graph 22 chunks in 45 graph layers Data type datetime64[ns] numpy.ndarray",2  8036,

Unnamed: 0,Array,Chunk
Bytes,125.56 kiB,5.72 kiB
Shape,"(8036, 2)","(366, 2)"
Dask graph,22 chunks in 45 graph layers,22 chunks in 45 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,62.84 MiB,2.86 MiB
Shape,"(8036, 25, 41)","(366, 25, 41)"
Dask graph,22 chunks in 47 graph layers,22 chunks in 47 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 62.84 MiB 2.86 MiB Shape (8036, 25, 41) (366, 25, 41) Dask graph 22 chunks in 47 graph layers Data type float64 numpy.ndarray",41  25  8036,

Unnamed: 0,Array,Chunk
Bytes,62.84 MiB,2.86 MiB
Shape,"(8036, 25, 41)","(366, 25, 41)"
Dask graph,22 chunks in 47 graph layers,22 chunks in 47 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [6]:
#Multiple variables test.

variables = ["hfls", "tas"]
files=[]
for variable in variables:
    # get ERA5 subdirectory
    subdirectory = Path(directory + '/' + region +'/'+time_freq+'/'+obs_LOOKUP[variable]['obs_long_name']+'/')
    files+=list(subdirectory.glob("*-"+time_freq+"-*"+obs_LOOKUP[variable]['obs_long_name']+"*20*.nc")) #Select all the netCDF files in the directory
files

[PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2007.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2008.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2018.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2012.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5-land/belgium/daily/surface_latent_heat_flux/era5-land-daily-belgium-surface_latent_heat_flux-2021.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200

In [7]:
ERA5Land_ds = ic.convert_input(files) #Convert the input to the correct format
ERA5Land_ds

ValueError: Cannot specify both coords='different' and compat='override'.