Skip to content

Commit

Permalink
Add cmorizer scripts for NOAA-ERSST. (#1799)
Browse files Browse the repository at this point in the history
Co-authored-by: rbeucher <romain.beucher@anu.edu.au>
Co-authored-by: Felicity Chun <felicity.chun@anu.edu.au>
Co-authored-by: Lisa Bock <lisa.bock@dlr.de>
Co-authored-by: Felicity Chun <32269066+flicj191@users.noreply.github.com>
Co-authored-by: Bouwe Andela <b.andela@esciencecenter.nl>
  • Loading branch information
6 people committed Jan 25, 2024
1 parent bed6408 commit 7e9eecd
Show file tree
Hide file tree
Showing 11 changed files with 395 additions and 0 deletions.
4 changes: 4 additions & 0 deletions doc/sphinx/source/input.rst
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,10 @@ A list of the datasets for which a CMORizers is available is provided in the fol
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NOAA-CIRES-20CR-V3 | clt, clwvi, hus, prw, rlut, rlutcs, rsut, rsutcs (Amon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NOAA-ERSSTv3b | tos (Omon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NOAA-ERSSTv5 | tos (Omon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NOAA-MBL-CH4 | ch4s (Amon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NOAAGlobalTemp | tasa (Amon) | 2 | Python |
Expand Down
20 changes: 20 additions & 0 deletions esmvaltool/cmorizers/data/cmor_config/NOAA-ERSSTv3b.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
# Filename
filename: 'ersst.*.nc'

# Common global attributes for Cmorizer output
attributes:
project_id: OBS6
dataset_id: NOAA-ERSSTv3b
version: 'v3b'
tier: 2
modeling_realm: reanaly
source: https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf/'
reference: 'ersstv3b'
comment: ''

# Variables to cmorize
variables:
tos:
mip: Omon
raw: sst
20 changes: 20 additions & 0 deletions esmvaltool/cmorizers/data/cmor_config/NOAA-ERSSTv5.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
# Filename
filename: 'ersst.v5.*.nc'

# Common global attributes for Cmorizer output
attributes:
project_id: OBS6
dataset_id: NOAA-ERSSTv5
version: 'v5'
tier: 2
modeling_realm: reanaly
source: 'https://doi.org/10.7289/V5T72FNM'
reference: 'ersstv5'
comment: ''

# Variables to cmorize
variables:
tos:
mip: Omon
raw: sst
18 changes: 18 additions & 0 deletions esmvaltool/cmorizers/data/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,24 @@ datasets:
ntatFlxSI-MO/csulf.ntat.mon.mean.nc
ntatFlxSI-MO/csusf.ntat.mon.mean.nc
NOAA-ERSSTv3b:
tier: 2
source: https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf/
last_access: 2023-12-04
info: |
Download the following files:
ersst.yyyymm.nc
for years 1854 to 2020
NOAA-ERSSTv5:
tier: 2
source: https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v5/netcdf/
last_access: 2023-12-04
info: |
Download the following files:
ersst.v5.yyyymm.nc
for years 1854 onwards
NOAAGlobalTemp:
tier: 2
source: https://www.ncei.noaa.gov/data/noaa-global-surface-temperature/v5/access/
Expand Down
49 changes: 49 additions & 0 deletions esmvaltool/cmorizers/data/downloaders/datasets/noaa_ersstv3b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Script to download NOAA-ERSST-v3b."""
import logging
from datetime import datetime
from dateutil import relativedelta

from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader

logger = logging.getLogger(__name__)


def download_dataset(config, dataset, dataset_info, start_date, end_date,
overwrite):
"""Download dataset.
Parameters
----------
config : dict
ESMValTool's user configuration
dataset : str
Name of the dataset
dataset_info : dict
Dataset information from the datasets.yml file
start_date : datetime
Start of the interval to download
end_date : datetime
End of the interval to download
overwrite : bool
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(1854, 1, 1)
if end_date is None:
end_date = datetime(2020, 1, 1)

loop_date = start_date

downloader = WGetDownloader(
config=config,
dataset=dataset,
dataset_info=dataset_info,
overwrite=overwrite,
)
base_path = ("https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf"
"/ersst.{year}{month:02d}.nc")

while loop_date <= end_date:
downloader.download_folder(
base_path.format(year=loop_date.year, month=loop_date.month), [])
loop_date += relativedelta.relativedelta(months=1)
49 changes: 49 additions & 0 deletions esmvaltool/cmorizers/data/downloaders/datasets/noaa_ersstv5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Script to download NOAA-ERSST-V5."""
import logging
from datetime import datetime
from dateutil import relativedelta

from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader

logger = logging.getLogger(__name__)


def download_dataset(config, dataset, dataset_info, start_date, end_date,
overwrite):
"""Download dataset.
Parameters
----------
config : dict
ESMValTool's user configuration
dataset : str
Name of the dataset
dataset_info : dict
Dataset information from the datasets.yml file
start_date : datetime
Start of the interval to download
end_date : datetime
End of the interval to download
overwrite : bool
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(1854, 1, 1)
if end_date is None:
end_date = datetime(2020, 1, 1)
loop_date = start_date

downloader = WGetDownloader(
config=config,
dataset=dataset,
dataset_info=dataset_info,
overwrite=overwrite,
)

base_path = ("https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v5/netcdf/"
"ersst.v5.{year}{month:02d}.nc")

while loop_date <= end_date:
downloader.download_folder(
base_path.format(year=loop_date.year, month=loop_date.month), [])
loop_date += relativedelta.relativedelta(months=1)
89 changes: 89 additions & 0 deletions esmvaltool/cmorizers/data/formatters/datasets/noaa_ersstv3b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""ESMValTool CMORizer for NOAA ERSST data, version 3b.
This is the CMORizer script for the NOAA Extended Reconstructed
Sea Surface Temperature (ERSST) in its version 3b.
Tier
Tier 2: open dataset.
Source
https://doi.org/10.1175/1520-0442-16.10.1495
Last access
20200520
Download and processing instructions
The data is provided by NOAA at:
https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf/
"""

import logging
import os
import re

import iris
from cf_units import Unit

from esmvaltool.cmorizers.data import utilities as utils

logger = logging.getLogger(__name__)


def _get_filepaths(in_dir, basename):
"""Find correct name of file (extend basename with timestamp)."""
regex = re.compile(basename)
return_files = []
for files in os.listdir(in_dir):

if regex.match(files):
return_files.append(os.path.join(in_dir, files))

return return_files


def _fix_time_coord(cube, _field, _filename):
"""Set time points to central day of month."""
time_coord = cube.coord('time')
new_unit = Unit('days since 1850-01-01 00:00:00', calendar='standard')
time_coord.convert_units(new_unit)
old_time = new_unit.num2date(time_coord.points)
new_time = [d.replace(day=15) for d in old_time]
time_coord.points = new_unit.date2num(new_time)


def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir):
"""Extract variable from all files."""
var = cmor_info.short_name
cubes = iris.load(filepath, raw_var, _fix_time_coord)
iris.util.equalise_attributes(cubes)
cube = cubes.concatenate_cube()
cube = iris.util.squeeze(cube)

utils.fix_var_metadata(cube, cmor_info)
utils.set_global_atts(cube, attrs)
utils.save_variable(cube,
var,
out_dir,
attrs,
unlimited_dimensions=['time'])


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
glob_attrs = cfg['attributes']
cmor_table = cfg['cmor_table']

filepaths = _get_filepaths(in_dir, cfg['filename'])

if len(filepaths) > 0:
logger.info("Found %d input files in '%s'", len(filepaths), in_dir)
else:
logger.info("No files found, basename: %s", cfg['filename'])

for (var, var_info) in cfg['variables'].items():
logger.info("CMORizing variable '%s'", var)
glob_attrs['mip'] = var_info['mip']
cmor_info = cmor_table.get_variable(var_info['mip'], var)
raw_var = var_info.get('raw', var)
_extract_variable(raw_var, cmor_info, glob_attrs, filepaths, out_dir)
105 changes: 105 additions & 0 deletions esmvaltool/cmorizers/data/formatters/datasets/noaa_ersstv5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""ESMValTool CMORizer for NOAA ERSST data, version 5.
This is the CMORizer script for the NOAA Extended Reconstructed Sea Surface
Temperature (ERSST) data of version 5.
Tier
Tier 2: open dataset.
Source
https://doi.org/10.7289/V5T72FNM
Last access
20200520
Download and processing instructions
The data is provided by NOAA at:
https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v5/netcdf/
"""

import logging
import os
import re

import iris
import cf_units

from esmvaltool.cmorizers.data import utilities as utils

logger = logging.getLogger(__name__)


def _get_filepaths(in_dir, basename):
"""Find correct name of file (extend basename with timestamp)."""
regex = re.compile(basename)
return_files = []
return_files_gr08 = []
for file in os.listdir(in_dir):

if regex.match(file):
year = file.split('.')[2][:4] # ersst.v5.$yr$nm.nc
# return 2 lists as files differ from 2008
if int(year) < 2008:
return_files.append(os.path.join(in_dir, file))
else:
return_files_gr08.append(os.path.join(in_dir, file))

return return_files, return_files_gr08


def _fix_time_coord(cube, _, _filename):
"""Set time points to central day of month and standardise time units."""
t_coord = cube.coord('time')
_unit = t_coord.units
new_time = [d.replace(day=15) for d in _unit.num2date(t_coord.points)]
t_coord.points = _unit.date2num(new_time).astype('float64')
t_coord.units = cf_units.Unit(t_coord.units.origin, calendar='standard')
t_coord.long_name = 'Time'


def _extract_variable(raw_var, cmor_info, attrs, filepaths, out_dir):
"""Extract variable and concatenate months."""
var = cmor_info.short_name

cubels = iris.load(filepaths, raw_var, _fix_time_coord)
iris.util.equalise_attributes(cubels)
iris.util.unify_time_units(cubels)
cube = cubels.concatenate_cube()
cube = iris.util.squeeze(cube)

utils.fix_var_metadata(cube, cmor_info)
utils.fix_coords(cube)

utils.set_global_atts(cube, attrs)
utils.save_variable(cube,
var,
out_dir,
attrs,
unlimited_dimensions=['time'])


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
glob_attrs = cfg['attributes']
cmor_table = cfg['cmor_table']

filepaths = _get_filepaths(in_dir, cfg['filename'])

if len(filepaths[0]) > 0 or len(filepaths[1]) > 0:
totalfiles = len(filepaths[0]) + len(filepaths[1])
logger.info("%d files before 2008", len(filepaths[0]))
logger.info("Found %d input files in '%s'", totalfiles, in_dir)
else:
logger.info("No files found, basename: %s", cfg['filename'])

# Run the cmorization
for (var, var_info) in cfg['variables'].items():
logger.info("CMORizing variable '%s'", var)
glob_attrs['mip'] = var_info['mip']
cmor_info = cmor_table.get_variable(var_info['mip'], var)
raw_var = var_info.get('raw', var)
_extract_variable(raw_var, cmor_info, glob_attrs,
filepaths[0], out_dir)
_extract_variable(raw_var, cmor_info, glob_attrs,
filepaths[1], out_dir)
18 changes: 18 additions & 0 deletions esmvaltool/recipes/examples/recipe_check_obs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,24 @@ diagnostics:
type: reanaly, version: v2, start_year: 1871, end_year: 2012}
scripts: null

NOAA-ERSSTv5:
description: NOAA-ERSSTv5 check
variables:
tos:
additional_datasets:
- {dataset: NOAA-ERSSTv5, project: OBS6, mip: Omon, tier: 2,
type: reanaly, version: v5, start_year: 1854, end_year: 2000}
scripts: null

NOAA-ERSSTv3b:
description: NOAA-ERSSTv3b check
variables:
tos:
additional_datasets:
- {dataset: NOAA-ERSSTv3b, project: OBS6, mip: Omon, tier: 2,
type: reanaly, version: v3b, start_year: 1854, end_year: 2019}
scripts: null

NOAA-MBL-CH4:
description: NOAA marine boundary layer CH4 check
variables:
Expand Down

0 comments on commit 7e9eecd

Please sign in to comment.