# Workflow Example with ERA5 input convertor
Here we illustrate a complete workflow example using the ERA5 convertor

## Imports

In [1]:
from pathlib import Path

import xarray as xr
import pandas as pd

import valenspy as vp #The Valenspy package
from valenspy.inputconverter_functions import ERA5_to_CF

from yaml import safe_load
from typing import Union, List

In [2]:
#Import Converter - This input converter will not do anything to the data.
ic = vp.InputConverter(ERA5_to_CF)


## 1. Convert the data

### Get the filenames of ERA5 data corresponding to CORDEX variable name


In [3]:
machine = "hortense"

# User defined variable, here we look at 2m temperature. 
variable = "tas"
dataset = "ERA5"
region = "europe" # "belgium"
time_freq = "daily" ## Important to account here are the postprocessed files at daily time resolution and their naming (eg max daily temperature does not correspond to ERA5 variable name as defined!!)

### Generate the path and filename of obs file

In [4]:
# Get the current file directory and load the CORDEX variables.yml file

# files = Path(__file__).resolve().parent -- this is not working in notebook
src_path = Path("../src/valenspy") ## -- to be removed. 

with open(src_path / "ancilliary_data" / "CORDEX_variables.yml") as file:
    CORDEX_VARIABLES = safe_load(file)

with open(src_path / "ancilliary_data" / Path("ERA5_lookup.yml")) as file:
    obs_LOOKUP = safe_load(file)

In [5]:
# this will be part of the PATH generator. 

# get path of observational datasets from path settings .yml
with open(src_path / "ancilliary_data" / Path("dataset_PATHS.yml")) as file:
    dataset_PATHS = safe_load(file)
directory = dataset_PATHS[machine][dataset]

# get ERA5 variable corresponding to the requested variable using its look-up table
obs_var = obs_LOOKUP[variable]['obs_name']

# get ERA5 subdirectory
subdirectory = Path(directory + '/' + region +'/'+time_freq+'/'+obs_LOOKUP[variable]['obs_long_name']+'/')

# open the EOBS file for the corresponding variable
files = list(subdirectory.glob("*-"+time_freq+"-*"+obs_LOOKUP[variable]['obs_long_name']+"*20*.nc")) #Select all the netCDF files in the directory
files

[PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2013.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2010.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2017.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2006.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2014.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2019.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/

In [6]:
# pass user defined metatadata
metadata_info = {'region' : "europe", 'freq' : 'daily', 'spatial_resolution': '0.1deg' }
# test of ERA5 convertor function
ERA5_ds = ic.convert_input(files, metadata_info) #Convert the input to the correct format
ERA5_ds

The file is ValEnsPy CF compliant.
50.00% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas']
Unknown to ValEnsPy: ['time_bnds']


Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 41 graph layers,20 chunks in 41 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 114.14 kiB 5.72 kiB Shape (7305, 2) (366, 2) Dask graph 20 chunks in 41 graph layers Data type datetime64[ns] numpy.ndarray",2  7305,

Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 41 graph layers,20 chunks in 41 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.56 GiB 127.17 MiB Shape (7305, 163, 289) (362, 161, 286) Dask graph 160 chunks in 41 graph layers Data type float64 numpy.ndarray",289  163  7305,

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
# Example without metadata passed 

# test of ERA5 convertor function
ERA5_ds = ic.convert_input(files) #Convert the input to the correct format
ERA5_ds

The file is ValEnsPy CF compliant.
50.00% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas']
Unknown to ValEnsPy: ['time_bnds']


Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 41 graph layers,20 chunks in 41 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 114.14 kiB 5.72 kiB Shape (7305, 2) (366, 2) Dask graph 20 chunks in 41 graph layers Data type datetime64[ns] numpy.ndarray",2  7305,

Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 41 graph layers,20 chunks in 41 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.56 GiB 127.17 MiB Shape (7305, 163, 289) (362, 161, 286) Dask graph 160 chunks in 41 graph layers Data type float64 numpy.ndarray",289  163  7305,

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [8]:
#Multiple variables test.

variables = ["tas", "pr"]
files=[]
for variable in variables:
    # get ERA5 subdirectory
    subdirectory = Path(directory + '/' + region +'/'+time_freq+'/'+obs_LOOKUP[variable]['obs_long_name']+'/')
    files+=list(subdirectory.glob("*-"+time_freq+"-*"+obs_LOOKUP[variable]['obs_long_name']+"*20*.nc")) #Select all the netCDF files in the directory
files

[PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2013.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2010.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2017.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2006.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2014.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/project_input/External/observations/era5/europe/daily/2m_temperature/era5-daily-europe-2m_temperature-2019.nc'),
 PosixPath('/dodrio/scratch/projects/2022_200/

In [9]:
ERA5_ds = ic.convert_input(files) #Convert the input to the correct format
ERA5_ds

The file is ValEnsPy CF compliant.
66.67% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas', 'pr']
Unknown to ValEnsPy: ['time_bnds']


Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 46 graph layers,20 chunks in 46 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 114.14 kiB 5.72 kiB Shape (7305, 2) (366, 2) Dask graph 20 chunks in 46 graph layers Data type datetime64[ns] numpy.ndarray",2  7305,

Unnamed: 0,Array,Chunk
Bytes,114.14 kiB,5.72 kiB
Shape,"(7305, 2)","(366, 2)"
Dask graph,20 chunks in 46 graph layers,20 chunks in 46 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.56 GiB 127.17 MiB Shape (7305, 163, 289) (362, 161, 286) Dask graph 160 chunks in 41 graph layers Data type float64 numpy.ndarray",289  163  7305,

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 43 graph layers,160 chunks in 43 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.56 GiB 127.17 MiB Shape (7305, 163, 289) (362, 161, 286) Dask graph 160 chunks in 43 graph layers Data type float64 numpy.ndarray",289  163  7305,

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 43 graph layers,160 chunks in 43 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
ERA5_ds['tas']

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.56 GiB 127.17 MiB Shape (7305, 163, 289) (362, 161, 286) Dask graph 160 chunks in 41 graph layers Data type float64 numpy.ndarray",289  163  7305,

Unnamed: 0,Array,Chunk
Bytes,2.56 GiB,127.17 MiB
Shape,"(7305, 163, 289)","(362, 161, 286)"
Dask graph,160 chunks in 41 graph layers,160 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
