From 8d213dc9f9f4e27adfc6ad7ff15407a248a84160 Mon Sep 17 00:00:00 2001
From: Fabien Maussion <fabien.maussion@uibk.ac.at>
Date: Thu, 3 Jun 2021 14:51:58 +0200
Subject: [PATCH] Add support for LMR data to shop (#1257)

---
 docs/whats-new.rst        |   2 +
 oggm/shop/gcm_climate.py  | 232 ++++++++++++++++++++++++++------------
 oggm/tests/test_prepro.py |  56 ++++++++-
 oggm/utils/_downloads.py  |   2 +-
 4 files changed, 220 insertions(+), 72 deletions(-)

diff --git a/docs/whats-new.rst b/docs/whats-new.rst
index e72316f77..7436f87ef 100644
--- a/docs/whats-new.rst
+++ b/docs/whats-new.rst
@@ -16,6 +16,8 @@ Enhancements
 - Added ``cook_rgidf()`` function in ``oggm.utils`` to simplify the use
   of a non-RGI glacier inventory in OGGM (:pull:`1251`).
   By `Li Fei <https://github.com/Keeptg>`_
+- Added support for last millenium reanalysis data to the shop (:pull:`1257`).
+  By `Fabien Maussion <https://github.com/fmaussion>`_
 
 Bug fixes
 ~~~~~~~~~
diff --git a/oggm/shop/gcm_climate.py b/oggm/shop/gcm_climate.py
index 8ec50966f..25784e73a 100644
--- a/oggm/shop/gcm_climate.py
+++ b/oggm/shop/gcm_climate.py
@@ -5,6 +5,7 @@
 import warnings
 
 # External libs
+import cftime
 import numpy as np
 import netCDF4
 import xarray as xr
@@ -86,79 +87,76 @@ def process_gcm_data(gdir, filesuffix='', prcp=None, temp=None,
     assert len(prcp) // 12 == len(prcp) / 12, 'Somehow we didn\'t get full years'
     assert len(temp) // 12 == len(temp) / 12, 'Somehow we didn\'t get full years'
 
-    # Get CRU to apply the anomaly to
+    # Get the reference data to apply the anomaly to
     fpath = gdir.get_filepath('climate_historical')
-    ds_cru = xr.open_dataset(fpath)
+    with xr.open_dataset(fpath) as ds_ref:
+
+        ds_ref = ds_ref.sel(time=slice(*year_range))
+
+        # compute monthly anomalies
+        # of temp
+        if scale_stddev:
+            # This is a bit more arithmetic
+            ts_tmp_sel = temp.sel(time=slice(*year_range))
+            ts_tmp_std = ts_tmp_sel.groupby('time.month').std(dim='time')
+            std_fac = ds_ref.temp.groupby('time.month').std(dim='time') / ts_tmp_std
+            std_fac = std_fac.roll(month=13-sm, roll_coords=True)
+            std_fac = np.tile(std_fac.data, len(temp) // 12)
+            # We need an even number of years for this to work
+            if ((len(ts_tmp_sel) // 12) % 2) == 1:
+                raise InvalidParamsError('We need an even number of years '
+                                         'for this to work')
+            win_size = len(ts_tmp_sel) + 1
+
+            def roll_func(x, axis=None):
+                x = x[:, ::12]
+                n = len(x[0, :]) // 2
+                xm = np.nanmean(x, axis=axis)
+                return xm + (x[:, n] - xm) * std_fac
+
+            temp = temp.rolling(time=win_size, center=True,
+                                min_periods=1).reduce(roll_func)
 
-    # Add CRU clim
-    dscru = ds_cru.sel(time=slice(*year_range))
-
-    # compute monthly anomalies
-    # of temp
-    if scale_stddev:
-        # This is a bit more arithmetic
         ts_tmp_sel = temp.sel(time=slice(*year_range))
-        ts_tmp_std = ts_tmp_sel.groupby('time.month').std(dim='time')
-        std_fac = dscru.temp.groupby('time.month').std(dim='time') / ts_tmp_std
-        std_fac = std_fac.roll(month=13-sm, roll_coords=True)
-        std_fac = np.tile(std_fac.data, len(temp) // 12)
-        # We need an even number of years for this to work
-        if ((len(ts_tmp_sel) // 12) % 2) == 1:
-            raise InvalidParamsError('We need an even number of years '
-                                     'for this to work')
-        win_size = len(ts_tmp_sel) + 1
-
-        def roll_func(x, axis=None):
-            x = x[:, ::12]
-            n = len(x[0, :]) // 2
-            xm = np.nanmean(x, axis=axis)
-            return xm + (x[:, n] - xm) * std_fac
-
-        temp = temp.rolling(time=win_size, center=True,
-                            min_periods=1).reduce(roll_func)
-
-    ts_tmp_sel = temp.sel(time=slice(*year_range))
-    ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time')
-    ts_tmp = temp.groupby('time.month') - ts_tmp_avg
-    # of precip -- scaled anomalies
-    ts_pre_avg = prcp.sel(time=slice(*year_range))
-    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
-    ts_pre_ano = prcp.groupby('time.month') - ts_pre_avg
-    # scaled anomalies is the default. Standard anomalies above
-    # are used later for where ts_pre_avg == 0
-    ts_pre = prcp.groupby('time.month') / ts_pre_avg
-
-    # for temp
-    loc_tmp = dscru.temp.groupby('time.month').mean()
-    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
-
-    # for prcp
-    loc_pre = dscru.prcp.groupby('time.month').mean()
-    # scaled anomalies
-    ts_pre = ts_pre.groupby('time.month') * loc_pre
-    # standard anomalies
-    ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
-    # Correct infinite values with standard anomalies
-    ts_pre.values = np.where(np.isfinite(ts_pre.values),
-                             ts_pre.values,
-                             ts_pre_ano.values)
-    # The previous step might create negative values (unlikely). Clip them
-    ts_pre.values = utils.clip_min(ts_pre.values, 0)
-
-    assert np.all(np.isfinite(ts_pre.values))
-    assert np.all(np.isfinite(ts_tmp.values))
-
-    gdir.write_monthly_climate_file(temp.time.values,
-                                    ts_pre.values, ts_tmp.values,
-                                    float(dscru.ref_hgt),
-                                    prcp.lon.values, prcp.lat.values,
-                                    time_unit=time_unit,
-                                    calendar=calendar,
-                                    file_name='gcm_data',
-                                    source=source,
-                                    filesuffix=filesuffix)
-
-    ds_cru.close()
+        ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time')
+        ts_tmp = temp.groupby('time.month') - ts_tmp_avg
+        # of precip -- scaled anomalies
+        ts_pre_avg = prcp.sel(time=slice(*year_range))
+        ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
+        ts_pre_ano = prcp.groupby('time.month') - ts_pre_avg
+        # scaled anomalies is the default. Standard anomalies above
+        # are used later for where ts_pre_avg == 0
+        ts_pre = prcp.groupby('time.month') / ts_pre_avg
+
+        # for temp
+        loc_tmp = ds_ref.temp.groupby('time.month').mean()
+        ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
+
+        # for prcp
+        loc_pre = ds_ref.prcp.groupby('time.month').mean()
+        # scaled anomalies
+        ts_pre = ts_pre.groupby('time.month') * loc_pre
+        # standard anomalies
+        ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
+        # Correct infinite values with standard anomalies
+        ts_pre.values = np.where(np.isfinite(ts_pre.values),
+                                 ts_pre.values,
+                                 ts_pre_ano.values)
+        # The previous step might create negative values (unlikely). Clip them
+        ts_pre.values = utils.clip_min(ts_pre.values, 0)
+
+        assert np.all(np.isfinite(ts_pre.values))
+        assert np.all(np.isfinite(ts_tmp.values))
+
+        gdir.write_monthly_climate_file(temp.time.values,
+                                        ts_pre.values, ts_tmp.values,
+                                        float(ds_ref.ref_hgt),
+                                        prcp.lon.values, prcp.lat.values,
+                                        time_unit=time_unit,
+                                        calendar=calendar,
+                                        file_name='gcm_data',
+                                        source=source,
+                                        filesuffix=filesuffix)
 
 
 @entity_task(log, writes=['gcm_data'])
@@ -332,3 +330,97 @@ def process_cmip_data(gdir, filesuffix='', fpath_temp=None,
 
     process_gcm_data(gdir, filesuffix=filesuffix, prcp=precip, temp=temp,
                      source=filesuffix, **kwargs)
+
+
+@entity_task(log, writes=['gcm_data'])
+def process_lmr_data(gdir, fpath_temp=None, fpath_precip=None,
+                     year_range=('1951', '1980'), filesuffix='', **kwargs):
+    """Read, process and store the Last Millennium Reanalysis (LMR) data for this glacier.
+
+    LMR data: https://atmos.washington.edu/~hakim/lmr/LMRv2/
+
+    LMR data is annualised in anomaly format relative to 1951-1980. We
+    create synthetic timeseries from the reference data.
+
+    It stores the data in a format that can be used by the OGGM mass balance
+    model and in the glacier directory.
+
+    Parameters
+    ----------
+    fpath_temp : str
+        path to the temp file (default: LMR v2.1 from server above)
+    fpath_precip : str
+        path to the precip file (default: LMR v2.1 from server above)
+    year_range : tuple of str
+        the year range for which you want to compute the anomalies. Default
+        for LMR is `('1951', '1980')`
+    filesuffix : str
+        append a suffix to the filename (useful for ensemble experiments).
+
+    **kwargs: any kwarg to be passed to ref:`process_gcm_data`
+    """
+
+    # Get the path of GCM temperature & precipitation data
+    base_url = 'https://atmos.washington.edu/%7Ehakim/lmr/LMRv2/'
+    if fpath_temp is None:
+        fpath_temp = utils.file_downloader(base_url + 'air_MCruns_ensemble_mean_LMRv2.1.nc')
+    if fpath_precip is None:
+        fpath_precip = utils.file_downloader(base_url + 'prate_MCruns_ensemble_mean_LMRv2.1.nc')
+
+    # Glacier location
+    glon = gdir.cenlon
+    glat = gdir.cenlat
+
+    # Read the GCM files
+    with xr.open_dataset(fpath_temp, use_cftime=True) as tempds, \
+            xr.open_dataset(fpath_precip, use_cftime=True) as precipds:
+
+        # Check longitude conventions
+        if tempds.lon.min() >= 0 and glon <= 0:
+            glon += 360
+
+        # Take the closest to the glacier
+        # Should we consider GCM interpolation?
+        temp = tempds.air.sel(lat=glat, lon=glon, method='nearest')
+        precip = precipds.prate.sel(lat=glat, lon=glon, method='nearest')
+
+        # Currently we just take the mean of the ensemble, although
+        # this is probably not advised. The GCM climate will correct
+        # anyways
+        temp = temp.mean(dim='MCrun')
+        precip = precip.mean(dim='MCrun')
+
+        # Precip unit is kg/m^2/s we convert to mm month since we apply the anomaly after
+        precip = precip * 30.5 * (60 * 60 * 24)
+
+        # Back to [-180, 180] for OGGM
+        temp.lon.values = temp.lon if temp.lon <= 180 else temp.lon - 360
+        precip.lon.values = precip.lon if precip.lon <= 180 else precip.lon - 360
+
+    # OK now we have to turn these annual timeseries in monthly data
+    # We take the ref climate
+    fpath = gdir.get_filepath('climate_historical')
+    with xr.open_dataset(fpath) as ds_ref:
+        ds_ref = ds_ref.sel(time=slice(*year_range))
+
+        loc_tmp = ds_ref.temp.groupby('time.month').mean()
+        loc_pre = ds_ref.prcp.groupby('time.month').mean()
+
+        # Make time coord
+        t = np.cumsum([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] * len(temp))
+        t = cftime.num2date(np.append([0], t[:-1]), 'days since 0000-01-01 00:00:00',
+                            calendar='noleap')
+
+        temp = xr.DataArray((loc_tmp.data + temp.data[:, np.newaxis]).flatten(),
+                            coords={'time': t, 'lon': temp.lon, 'lat': temp.lat},
+                            dims=('time',))
+
+        # For precip the std dev is very small - lets keep it as is for now but
+        # this is a bit ridiculous. We clip to zero here to be sure
+        precip = utils.clip_min((loc_pre.data + precip.data[:, np.newaxis]).flatten(), 0)
+        precip = xr.DataArray(precip, dims=('time',),
+                              coords={'time': t, 'lon': temp.lon, 'lat': temp.lat})
+
+    process_gcm_data(gdir, filesuffix=filesuffix, prcp=precip, temp=temp,
+                     year_range=year_range, calendar='noleap',
+                     source='lmr', **kwargs)
diff --git a/oggm/tests/test_prepro.py b/oggm/tests/test_prepro.py
index 65158d649..ab3b08ad8 100644
--- a/oggm/tests/test_prepro.py
+++ b/oggm/tests/test_prepro.py
@@ -3427,7 +3427,8 @@ def test_process_cmip5(self):
                                        scesm.prcp.mean(),
                                        rtol=1e-3)
 
-            # Here no std dev!
+            # Here also std dev! But its not perfect because std_dev
+            # is preserved over 31 years
             _scru = scru.groupby('time.month').std(dim='time')
             _scesm = scesm.groupby('time.month').std(dim='time')
             assert np.allclose(_scru.temp, _scesm.temp, rtol=1e-2)
@@ -3450,6 +3451,59 @@ def test_process_cmip5(self):
             # N more than 30%? (silly test)
             np.testing.assert_allclose(scmip1.prcp, scmip2.prcp, rtol=0.3)
 
+    def test_process_lmr(self):
+
+        hef_file = get_demo_file('Hintereisferner_RGI5.shp')
+        entity = gpd.read_file(hef_file).iloc[0]
+
+        gdir = oggm.GlacierDirectory(entity, base_dir=self.testdir)
+        gis.define_glacier_region(gdir)
+        tasks.process_cru_data(gdir)
+
+        ci = gdir.get_climate_info()
+        self.assertEqual(ci['baseline_hydro_yr_0'], 1902)
+        self.assertEqual(ci['baseline_hydro_yr_1'], 2014)
+
+        fpath_temp = get_demo_file('air_MCruns_ensemble_mean_LMRv2.1.nc')
+        fpath_precip = get_demo_file('prate_MCruns_ensemble_mean_LMRv2.1.nc')
+        gcm_climate.process_lmr_data(gdir, fpath_temp=fpath_temp,
+                                     fpath_precip=fpath_precip)
+
+        fh = gdir.get_filepath('climate_historical')
+        fcmip = gdir.get_filepath('gcm_data')
+        with xr.open_dataset(fh) as cru, xr.open_dataset(fcmip) as cmip:
+
+            # Let's do some basic checks
+            scru = cru.sel(time=slice('1951', '1980'))
+            scesm = cmip.sel(time=slice('1951', '1980'))
+            # Climate during the chosen period should be the same
+            np.testing.assert_allclose(scru.temp.mean(),
+                                       scesm.temp.mean(),
+                                       rtol=1e-3)
+            np.testing.assert_allclose(scru.prcp.mean(),
+                                       scesm.prcp.mean(),
+                                       rtol=1e-3)
+
+            # Here also std dev! But its not perfect because std_dev
+            # is preserved over 31 years
+            _scru = scru.groupby('time.month').std(dim='time')
+            _scesm = scesm.groupby('time.month').std(dim='time')
+            np.testing.assert_allclose(_scru.temp, _scesm.temp, rtol=0.15)
+
+            # And also the annual cycle
+            scru = scru.groupby('time.month').mean(dim='time')
+            scesm = scesm.groupby('time.month').mean(dim='time')
+            np.testing.assert_allclose(scru.temp, scesm.temp, rtol=1e-3)
+            np.testing.assert_allclose(scru.prcp, scesm.prcp, rtol=1e-3)
+
+            # How did the annual cycle change with time?
+            scmip1 = cmip.sel(time=slice('1970', '1999'))
+            scmip2 = cmip.sel(time=slice('1800', '1829'))
+            scmip1 = scmip1.groupby('time.month').mean(dim='time')
+            scmip2 = scmip2.groupby('time.month').mean(dim='time')
+            # It has warmed
+            assert scmip2.temp.mean() < scmip1.temp.mean()
+
     def test_process_cmip5_scale(self):
 
         hef_file = get_demo_file('Hintereisferner_RGI5.shp')
diff --git a/oggm/utils/_downloads.py b/oggm/utils/_downloads.py
index 25ecd2e93..4d0465816 100644
--- a/oggm/utils/_downloads.py
+++ b/oggm/utils/_downloads.py
@@ -69,7 +69,7 @@
 # The given commit will be downloaded from github and used as source for
 # all sample data
 SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data'
-SAMPLE_DATA_COMMIT = 'c631560e5a84008e95b5bca3809992e0d60472cc'
+SAMPLE_DATA_COMMIT = '7c4d9ddb9cd855646f63d66d39324865acbbba0b'
 
 GDIR_L1L2_URL = ('https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/'
                  'L1-L2_files/centerlines/')