Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor lowess smoothing #193

Merged
merged 2 commits into from Sep 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -26,6 +26,9 @@ New Features
- Extract function to select the order of the auto regressive model: ``mesmer.core.auto_regression._select_ar_order_xr``
(`#176 <https://github.com/MESMER-group/mesmer/pull/176>`_).
By `Mathias Hauser <https://github.com/mathause>`_.
- Refactor the LOWESS smooting for xarray objects: :py:func:`mesmer.core.smooting.lowess`.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smooting --> smoothing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Fixed in #194

(`#193 <https://github.com/MESMER-group/mesmer/pull/193>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions docs/source/api.rst
Expand Up @@ -25,6 +25,7 @@ Statistical core functions
~core.auto_regression._select_ar_order_xr
~core.auto_regression._fit_auto_regression_xr
~core.auto_regression._draw_auto_regression_correlated_np
~core.smooting.lowess
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smooting --> smoothing


Computation
-----------
Expand Down
19 changes: 9 additions & 10 deletions mesmer/calibrate_mesmer/train_gt.py
Expand Up @@ -12,9 +12,9 @@
import joblib
import numpy as np
import xarray as xr
from statsmodels.nonparametric.smoothers_lowess import lowess

from mesmer.core.linear_regression import LinearRegression
from mesmer.core.smoothing import lowess
from mesmer.io import load_strat_aod


Expand Down Expand Up @@ -163,14 +163,14 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
return params_gt


def train_gt_ic_LOWESS(var):
def train_gt_ic_LOWESS(data):
"""
Derive smooth global trend of variable from single ESM ic ensemble with LOWESS
smoother.

Parameters
----------
var : np.ndarray
data : np.ndarray
2d array (run, time) of globally-averaged time series

Returns
Expand All @@ -181,22 +181,21 @@ def train_gt_ic_LOWESS(var):
fraction of the data used when estimating each y-value
"""

# number time steps
nr_ts = var.shape[1]
data = xr.DataArray(data, dims=("ensemble", "time"))

# average across all runs to get a first smoothing
av_var = np.mean(var, axis=0)
data = data.mean("ensemble")

dim = "time"

# apply lowess smoother to further smooth the Tglob time series
# rather arbitrarily chosen value that gives a smooth enough trend,
frac_lowess = 50 / nr_ts
frac = 50 / data.sizes[dim]

# open to changes but if much smaller, var trend ends up very wiggly
frac_lowess_name = "50/nr_ts"

gt_lowess = lowess(
av_var, np.arange(nr_ts), return_sorted=False, frac=frac_lowess, it=0
)
gt_lowess = lowess(data, dim=dim, frac=frac)

return gt_lowess, frac_lowess_name

Expand Down
49 changes: 49 additions & 0 deletions mesmer/core/smoothing.py
@@ -0,0 +1,49 @@
import numpy as np
import xarray as xr

from mesmer.core.utils import _check_dataarray_form


def lowess(data, dim, *, frac, use_coords_as_x=False, it=0):
"""LOWESS (Locally Weighted Scatterplot Smoothing) for xarray objects

Parameters
----------
data : xr.DataArray
Data to smooth (y-values).
dim : str
Dimension along which to smooth (x-dimension)
frac : float
Between 0 and 1. The fraction of the data used when estimating each y-value.
use_coords_as_x : boolean, default: False
If True uses ``data[dim]`` as x-values else uses ``np.arange(data[dim.size)``
(useful if ``dim`` are time coordinates).
it : int, default: 0
The number of residual-based reweightings to perform.
"""

from statsmodels.nonparametric.smoothers_lowess import lowess

if not isinstance(dim, str):
raise ValueError("Can only pass a single dimension.")

coords = data[dim]
_check_dataarray_form(coords, name=dim, ndim=1)

if use_coords_as_x:
x = coords
else:
x = xr.ones_like(coords)
x.data = np.arange(coords.size)

out = xr.apply_ufunc(
lowess,
data,
x,
input_core_dims=[[dim], [dim]],
output_core_dims=[[dim]],
vectorize=True,
kwargs={"frac": frac, "it": it, "return_sorted": False},
)

return out
61 changes: 61 additions & 0 deletions tests/integration/test_smoothing.py
@@ -0,0 +1,61 @@
import pytest
import xarray as xr
from statsmodels.nonparametric.smoothers_lowess import lowess

import mesmer.core.smoothing
from mesmer.core.utils import _check_dataarray_form

from .utils import trend_data_1D, trend_data_2D


def test_lowess_errors():
data = trend_data_2D()

with pytest.raises(ValueError, match="Can only pass a single dimension."):
mesmer.core.smoothing.lowess(data, ("lat", "lon"), frac=0.3)

with pytest.raises(ValueError, match="data should be 1-dimensional"):
mesmer.core.smoothing.lowess(data.to_dataset(), "data", frac=0.3)


@pytest.mark.parametrize("it", [0, 3])
@pytest.mark.parametrize("frac", [0.3, 0.5])
def test_lowess(it, frac):

data = trend_data_1D()

result = mesmer.core.smoothing.lowess(data, "time", frac=frac, it=it)

expected = lowess(
data.values, data.time.values, frac=frac, it=it, return_sorted=False
)
expected = xr.DataArray(expected, dims="time", coords={"time": data.time})

xr.testing.assert_allclose(result, expected)


def test_lowess_dataset():

data = trend_data_1D()

result = mesmer.core.smoothing.lowess(data.to_dataset(), "time", frac=0.3)

expected = lowess(
data.values, data.time.values, frac=0.3, it=0, return_sorted=False
)
expected = xr.DataArray(
expected, dims="time", coords={"time": data.time}, name="data"
)
expected = expected.to_dataset()

xr.testing.assert_allclose(result, expected)


def test_lowess_2D():
data = trend_data_2D()

result = mesmer.core.smoothing.lowess(data, "time", frac=0.3)

_check_dataarray_form(
result, "result", ndim=2, required_dims=("time", "cells"), shape=data.shape
)
4 changes: 2 additions & 2 deletions tests/integration/utils.py
Expand Up @@ -38,7 +38,7 @@ def trend_data_1D(n_timesteps=30, intercept=0, slope=1, scale=1):

data = intercept + slope * time + scatter

return xr.DataArray(data, dims=("time"), coords={"time": time})
return xr.DataArray(data, dims=("time"), coords={"time": time}, name="data")


def trend_data_2D(n_timesteps=30, n_lat=3, n_lon=2, intercept=0, slope=1, scale=1):
Expand All @@ -58,7 +58,7 @@ def trend_data_2D(n_timesteps=30, n_lat=3, n_lon=2, intercept=0, slope=1, scale=
"lat": ("cells", LAT.flatten()),
}

return xr.DataArray(data, dims=("cells", "time"), coords=coords)
return xr.DataArray(data, dims=("cells", "time"), coords=coords, name="data")


def trend_data_3D(n_timesteps=30, n_lat=3, n_lon=2, intercept=0, slope=1, scale=1):
Expand Down