From 654edae5b3417e36ae677e6a1f2a85b2925776d5 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 11 Jul 2023 15:00:17 +0200 Subject: [PATCH 1/4] provide aerosol data via pooch --- data/README.md | 3 ++ .../isaod_gl.dat => data/isaod_gl_2022.dat | 34 +++++++++++++++++-- environment.yml | 1 + mesmer/calibrate_mesmer/train_gt.py | 18 ++++++---- mesmer/core/_data.py | 25 ++++++++++++++ mesmer/io/load_obs.py | 20 ++++++++--- setup.cfg | 1 + 7 files changed, 87 insertions(+), 15 deletions(-) create mode 100644 data/README.md rename tests/test-data/calibrate-coarse-grid/observations/aerosols/isaod_gl.dat => data/isaod_gl_2022.dat (98%) mode change 100755 => 100644 create mode 100644 mesmer/core/_data.py diff --git a/data/README.md b/data/README.md new file mode 100644 index 00000000..af0cfe09 --- /dev/null +++ b/data/README.md @@ -0,0 +1,3 @@ +# mesmer data + +This folder contains auxiliary data for mesmer. They are downloaded on demand using [pooch](https://www.fatiando.org/pooch/latest/). diff --git a/tests/test-data/calibrate-coarse-grid/observations/aerosols/isaod_gl.dat b/data/isaod_gl_2022.dat old mode 100755 new mode 100644 similarity index 98% rename from tests/test-data/calibrate-coarse-grid/observations/aerosols/isaod_gl.dat rename to data/isaod_gl_2022.dat index 53d5f09a..d5988b55 --- a/tests/test-data/calibrate-coarse-grid/observations/aerosols/isaod_gl.dat +++ b/data/isaod_gl_2022.dat @@ -1,4 +1,4 @@ -# scripturl01 :: https://climexp.knmi.nl/getindices.cgi?STATION=stratospheric_AOD&TYPE=i&WMO=NASAData/saod_gl&id=$id +# scripturl01 :: http://climexp.knmi.nl/getindices.cgi?STATION=stratospheric_AOD&TYPE=i&WMO=NASAData/saod_gl&id=id # global Optical Thickness at 550 nm # from NASA/GISS # AOD [1] stratospheric aerosol optical depth @@ -7,7 +7,7 @@ # contact :: https://www.giss.nasa.gov/staff/makiko_sato.html # references :: Bourassa, A.E., A. Robock, et al. 2012: Large volcanic aerosol load in the stratosphere linked to Asian monsoon transport. Science 337, 78-81, doi:10.1126/science.1219371 # source_url :: https://data.giss.nasa.gov/modelforce/strataer/ -# history :: retrieved and converted 2020-08-14 17:12:30 +# history :: retrieved and converted 2023-07-10 21:49:40 # climexp_url :: https://climexp.knmi.nl/getindices.cgi?NASAData/saod_gl 1850 1 0.004 1850 2 0.004 @@ -2056,4 +2056,32 @@ 2020 5 0.000 2020 6 0.000 2020 7 0.000 - +2020 8 0.000 +2020 9 0.000 +2020 10 0.000 +2020 11 0.000 +2020 12 0.000 +2021 1 0.000 +2021 2 0.000 +2021 3 0.000 +2021 4 0.000 +2021 5 0.000 +2021 6 0.000 +2021 7 0.000 +2021 8 0.000 +2021 9 0.000 +2021 10 0.000 +2021 11 0.000 +2021 12 0.000 +2022 1 0.000 +2022 2 0.000 +2022 3 0.000 +2022 4 0.000 +2022 5 0.000 +2022 6 0.000 +2022 7 0.000 +2022 8 0.000 +2022 9 0.000 +2022 10 0.000 +2022 11 0.000 +2022 12 0.000 diff --git a/environment.yml b/environment.yml index 74fcf7d5..bcd0e2a9 100644 --- a/environment.yml +++ b/environment.yml @@ -12,6 +12,7 @@ dependencies: - numpy - packaging - pandas<2.0 + - pooch - regionmask>=0.8 - scikit-learn - sphinx diff --git a/mesmer/calibrate_mesmer/train_gt.py b/mesmer/calibrate_mesmer/train_gt.py index 3b3f19c0..47d23f39 100644 --- a/mesmer/calibrate_mesmer/train_gt.py +++ b/mesmer/calibrate_mesmer/train_gt.py @@ -6,6 +6,7 @@ Functions to train global trend module of MESMER. """ +import warnings import numpy as np import xarray as xr @@ -38,8 +39,8 @@ def train_gt(var, targ, esm, time, cfg, save_params=True): time : np.ndarray [scen] (1d array of years) - cfg : module - config file containing metadata + cfg : None + Passing cfg is no longer required. save_params : bool, default True determines if parameters are saved or not, default = True @@ -126,7 +127,7 @@ def train_gt(var, targ, esm, time, cfg, save_params=True): var_all = np.unique(var_all, axis=0) params_gt["saod"], params_gt["hist"] = train_gt_ic_OLSVOLC( - var_all, gt_lowess_hist, params_gt["time"]["hist"], cfg + var_all, gt_lowess_hist, params_gt["time"]["hist"] ) elif params_gt["method"] == "LOWESS": params_gt["hist"] = gt_lowess_hist @@ -201,7 +202,7 @@ def train_gt_ic_LOWESS(data): return gt_lowess, frac_lowess_name -def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg): +def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg=None): """ Derive global trend (emissions + volcanoes) parameters from single ESM ic ensemble by adding volcanic spikes to LOWESS trend. @@ -231,14 +232,17 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg): """ - # specify necessary variables from cfg file - dir_obs = cfg.dir_obs + if cfg is not None: + warnings.warn( + "Passing ``cfg`` to ``train_gt_ic_OLSVOLC`` is no longer necessary", + FutureWarning, + ) nr_runs, nr_ts = var.shape # account for volcanic eruptions in historical time period # load in observed stratospheric aerosol optical depth - aod_obs = load_strat_aod(time, dir_obs) + aod_obs = load_strat_aod(time) # drop "year" coords - aod_obs does not have coords (currently) aod_obs = aod_obs.drop_vars("year") diff --git a/mesmer/core/_data.py b/mesmer/core/_data.py new file mode 100644 index 00000000..20850410 --- /dev/null +++ b/mesmer/core/_data.py @@ -0,0 +1,25 @@ +import pooch + +import mesmer + + +def fetch_remote_data(name): + """ + uses pooch to cache files + """ + + cache_dir = pooch.os_cache("mesmer") + + REMOTE_RESSOURCE = pooch.create( + path=cache_dir, + # The remote data is on Github + base_url="https://github.com/MESMER-group/mesmer/raw/{version}/data/", + registry={ + "isaod_gl_2022.dat": "c83881a18e74912385ad578282de721cc8e866b62cbbc75446e52e7041c81cff", + }, + version=f"v{mesmer.__version__}", + version_dev="main", + ) + + # the file will be downloaded automatically the first time this is run. + return REMOTE_RESSOURCE.fetch(name) diff --git a/mesmer/io/load_obs.py b/mesmer/io/load_obs.py index 89c1d77c..5f012fb5 100644 --- a/mesmer/io/load_obs.py +++ b/mesmer/io/load_obs.py @@ -7,11 +7,14 @@ """ import os +import warnings import numpy as np import pandas as pd import xarray as xr +from mesmer.core._data import fetch_remote_data + def load_obs(targ, prod, lon, lat, cfg, sel_ref="native", ignore_nans=True): """Load observations which you previously downloaded. @@ -163,15 +166,15 @@ def load_obs_tblend(prod, lon, lat, cfg, sel_ref): return tblend, time -def load_strat_aod(time, dir_obs): +def load_strat_aod(time, dir_obs=None): """Load observed global stratospheric aerosol optical depth time series. Parameters ---------- time : np.ndarray 1d array of years the AOD time series is required for - dir_obs : str - pathway to observations + dir_obs : None + Deprecated. Returns ------- @@ -184,9 +187,16 @@ def load_strat_aod(time, dir_obs): cimp6, 1850 - 2005 for cmip5) """ - path_file = os.path.join(dir_obs, "aerosols", "isaod_gl.dat") + if dir_obs is not None: + warnings.warn( + "The aerosol data is now shipped with mesmer. Passing `dir_obs` to " + "``load_strat_aod`` is no longer necessary", + FutureWarning, + ) + + filename = fetch_remote_data("isaod_gl_2022.dat") df = pd.read_csv( - path_file, + filename, delim_whitespace=True, skiprows=11, names=("year", "month", "AOD"), diff --git a/setup.cfg b/setup.cfg index efeb355e..140c64dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,7 @@ install_requires = numpy packaging pandas < 2.0 + pooch regionmask scikit-learn statsmodels From e30fb4cf8a6e0818c114f0997d3eea48fd894d63 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 11 Jul 2023 15:06:08 +0200 Subject: [PATCH 2/4] fix hash --- mesmer/core/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesmer/core/_data.py b/mesmer/core/_data.py index 20850410..863906ed 100644 --- a/mesmer/core/_data.py +++ b/mesmer/core/_data.py @@ -15,7 +15,7 @@ def fetch_remote_data(name): # The remote data is on Github base_url="https://github.com/MESMER-group/mesmer/raw/{version}/data/", registry={ - "isaod_gl_2022.dat": "c83881a18e74912385ad578282de721cc8e866b62cbbc75446e52e7041c81cff", + "isaod_gl_2022.dat": "3d26e78bf0ee96a02c99e2a7a448dafda0ac847a5c914a75c7d9745e95fe68ee", }, version=f"v{mesmer.__version__}", version_dev="main", From 3a2f55cf5370c55abb3b5d1f092b49704ad2ff86 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 11 Jul 2023 15:11:11 +0200 Subject: [PATCH 3/4] fix correct docstring --- mesmer/calibrate_mesmer/train_gt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mesmer/calibrate_mesmer/train_gt.py b/mesmer/calibrate_mesmer/train_gt.py index 47d23f39..73550978 100644 --- a/mesmer/calibrate_mesmer/train_gt.py +++ b/mesmer/calibrate_mesmer/train_gt.py @@ -39,8 +39,8 @@ def train_gt(var, targ, esm, time, cfg, save_params=True): time : np.ndarray [scen] (1d array of years) - cfg : None - Passing cfg is no longer required. + cfg : module + config file containing metadata save_params : bool, default True determines if parameters are saved or not, default = True @@ -215,8 +215,8 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg=None): 1d array of smooth global trend of variable time : np.ndarray 1d array of years - cfg : module - config file containing metadata needed to load in stratospheric AOD time series + cfg : None + Passing cfg is no longer required. Returns ------- From dfd6156b80e2067e41977fda6dd09afc4b4d7b43 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 11 Jul 2023 17:30:28 +0200 Subject: [PATCH 4/4] changelog --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93148432..5f5b8850 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -62,6 +62,11 @@ New Features `_). By `Mathias Hauser `_. +- The aerosol data is now automatically downloaded using `pooch `__. + (`#267 `_). By `Mathias Hauser + `_. + + Breaking changes ^^^^^^^^^^^^^^^^