# Workflow Example with CCLM input convertor
Here we illustrate a complete workflow example using the CCLM convertor

## Imports

In [1]:
from pathlib import Path

import xarray as xr
import pandas as pd

import valenspy as vp #The Valenspy package
from valenspy.inputconverter_functions import CCLM_to_CF

from yaml import safe_load
from typing import Union, List

In [2]:
import valenspy as vp #The Valenspy package

CPU times: user 0 ns, sys: 5 µs, total: 5 µs
Wall time: 8.34 µs


## 1. Convert the data

### Get the filenames of CCLM data corresponding to CORDEX variable name


In [3]:
variable = 'tas'

### 1.1(a) Get the file names of the model data - with the input manager

In [5]:
manager = vp.InputManager(machine='hortense')

ds = manager.load_data("CCLM", ["tas"], freq="daily", path_identifiers=["EUR11_NU_TT_EC_TSO", "mean"])
ds

File paths found:
/dodrio/scratch/projects/2022_200/RCS/CORDEXBE2/postprocessing/EUR11_NU_TT_EC_TSO/T_2M/daily/T_2M_daymean.nc
Variable metadata is missing or incorrect
The file is NOT ValEnsPy CF compliant.
33.33% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas']
Unknown to ValEnsPy: ['time_bnds', 'rotated_pole']


Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 kiB,1.92 kiB
Shape,"(123, 2)","(123, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 1.92 kiB 1.92 kiB Shape (123, 2) (123, 2) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2  123,

Unnamed: 0,Array,Chunk
Bytes,1.92 kiB,1.92 kiB
Shape,"(123, 2)","(123, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.82 MiB,90.82 MiB
Shape,"(123, 434, 446)","(123, 434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 90.82 MiB 90.82 MiB Shape (123, 434, 446) (123, 434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434  123,

Unnamed: 0,Array,Chunk
Bytes,90.82 MiB,90.82 MiB
Shape,"(123, 434, 446)","(123, 434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [3]:
src_path = Path("../src/valenspy")

# define postprocessing base directory of CCLM
postproc_base_dir = "/dodrio/scratch/projects/2022_200/RCS/CORDEXBE2/postprocessing/"

# get experiment name
experiment = "EUR11_CO_TA_GC"

# get the file directory based on the model name. 

# reference dataset to compare to
model_name = 'CCLM'

# get lookup file for model
with open(src_path / "ancilliary_data" / Path(model_name+"_lookup.yml")) as file:
    mod_LOOKUP = safe_load(file)

# get CCLM variable corresponding to the requested variable using its look-up table
mod_var = mod_LOOKUP[variable]['mod_name']

# define the path
directory = Path(postproc_base_dir + experiment +'/'+mod_var + '/')

# define the CCLM files for the corresponding variable
files = list(directory.glob(mod_var+"_daymean.nc")) # Select all the netCDF files in the directory

files

[PosixPath('/dodrio/scratch/projects/2022_200/RCS/CORDEXBE2/postprocessing/EUR11_CO_TA_GC/T_2M/T_2M_daymean.nc')]

### 1.2 Use the input convertor to load the data

In [4]:
#Import Converter - This input converter will not do anything to the data.
ic = vp.InputConverter(CCLM_to_CF)

In [5]:
cclm_ds = ic.convert_input(files) # Convert the input to the correct format

Variable metadata is missing or incorrect
The file is NOT ValEnsPy CF compliant.
33.33% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas']
Unknown to ValEnsPy: ['time_bnds', 'rotated_pole']


### 1.1 Get the file names of the model data 
(eventuall, this will be done by the input manager)

In [14]:
src_path = Path("../src/valenspy")

# define postprocessing base directory of CCLM
postproc_base_dir = "/dodrio/scratch/projects/2022_200/RCS/CORDEXBE2/postprocessing/"

# get experiment name
experiment = "EUR11_CO_TA_GC"

# get the file directory based on the model name. 

# reference dataset to compare to
model_name = 'CCLM'

# get lookup file for model
with open(src_path / "ancilliary_data" / Path(model_name+"_lookup.yml")) as file:
    mod_LOOKUP = safe_load(file)

# get CCLM variable corresponding to the requested variable using its look-up table
mod_var = mod_LOOKUP[variable]['raw_name']

# define the path
directory = Path(postproc_base_dir + experiment +'/'+mod_var + '/')

# define the CCLM files for the corresponding variable
files = list(directory.glob(mod_var+"_daymean.nc")) # Select all the netCDF files in the directory

files

[PosixPath('/dodrio/scratch/projects/2022_200/RCS/CORDEXBE2/postprocessing/EUR11_CO_TA_GC/T_2M/T_2M_daymean.nc')]

### 1.2 Use the input convertor to load the data

In [15]:
#Import Converter - This input converter will not do anything to the data.
from valenspy.inputconverter_functions import CCLM_to_CF
ic = vp.InputConverter(CCLM_to_CF)

In [16]:
cclm_ds = ic.convert_input(files) # Convert the input to the correct format

Variable metadata is missing or incorrect
The file is NOT ValEnsPy CF compliant.
33.33% of the variables are ValEnsPy CF compliant
ValEnsPy CF compliant: ['tas']
Unknown to ValEnsPy: ['time_bnds', 'rotated_pole']


In [17]:
ds

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 756.11 kiB 756.11 kiB Shape (434, 446) (434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434,

Unnamed: 0,Array,Chunk
Bytes,756.11 kiB,756.11 kiB
Shape,"(434, 446)","(434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.92 kiB,1.92 kiB
Shape,"(123, 2)","(123, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 1.92 kiB 1.92 kiB Shape (123, 2) (123, 2) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2  123,

Unnamed: 0,Array,Chunk
Bytes,1.92 kiB,1.92 kiB
Shape,"(123, 2)","(123, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.82 MiB,90.82 MiB
Shape,"(123, 434, 446)","(123, 434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 90.82 MiB 90.82 MiB Shape (123, 434, 446) (123, 434, 446) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",446  434  123,

Unnamed: 0,Array,Chunk
Bytes,90.82 MiB,90.82 MiB
Shape,"(123, 434, 446)","(123, 434, 446)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
ds['tas'].mean('time').plot()

NameError: name 'cclm_ds' is not defined

In [9]:
ds['tas'].mean(dim=('rlat','rlon')).plot()

[<matplotlib.lines.Line2D at 0x14baa1db1640>]

### Check cclm for all available variables