New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add _fit_auto_regression functions #139
Changes from 2 commits
1f01350
a7b8d1a
bb12838
2d91015
1dd2846
92adbbf
439bac3
7d286f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,11 @@ | |
import joblib | ||
import numpy as np | ||
import statsmodels.api as sm | ||
import xarray as xr | ||
from packaging.version import Version | ||
from statsmodels.tsa.ar_model import AutoReg, ar_select_order | ||
from statsmodels.tsa.ar_model import ar_select_order | ||
|
||
from mesmer.core.auto_regression import _fit_auto_regression_xr | ||
|
||
|
||
def train_gv(gv, targ, esm, cfg, save_params=True, **kwargs): | ||
|
@@ -193,29 +196,26 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit): | |
AR_order_sel = int(np.percentile(AR_order_scens_tmp, **pct_kwargs)) | ||
|
||
# determine the AR params for the selected AR order | ||
params_gv["AR_int"] = 0 | ||
params_gv["AR_coefs"] = np.zeros(AR_order_sel) | ||
params_gv["AR_order_sel"] = AR_order_sel | ||
params_gv["AR_std_innovs"] = 0 | ||
|
||
res = list() | ||
for scen_idx, scen in enumerate(gv.keys()): | ||
nr_runs = gv[scen].shape[0] | ||
AR_order_runs_tmp = np.zeros(nr_runs) | ||
AR_int_tmp = 0 | ||
AR_coefs_tmp = np.zeros(AR_order_sel) | ||
AR_std_innovs_tmp = 0 | ||
data = gv[scen] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we split out a function for this inner loop, we might be able to make the pattern look more like linear regression does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although I guess it's almost as small as it can be already, the inner function would have to be called something like (Side note, what does 'run' mean here? Is that ensemble member?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is definitely a good idea. However, I am still not sure what our "outer" data structure should be (I have a prototype of a run is an ensemble member. I just followed Lea's terminology. But I agree we should standardize this stuff... |
||
|
||
for run in np.arange(nr_runs): | ||
AR_model_tmp = AutoReg( | ||
gv[scen][run], lags=AR_order_sel, old_names=False | ||
).fit() | ||
AR_int_tmp += AR_model_tmp.params[0] / nr_runs | ||
AR_coefs_tmp += AR_model_tmp.params[1:] / nr_runs | ||
AR_std_innovs_tmp += np.sqrt(AR_model_tmp.sigma2) / nr_runs | ||
|
||
params_gv["AR_int"] += AR_int_tmp / nr_scens | ||
params_gv["AR_coefs"] += AR_coefs_tmp / nr_scens | ||
params_gv["AR_std_innovs"] += AR_std_innovs_tmp / nr_scens | ||
# create temporary DataArray | ||
data = xr.DataArray(data, dims=("run", "time")) | ||
|
||
params = _fit_auto_regression_xr(data, dim="time", lags=AR_order_sel) | ||
params = params.mean("run") | ||
|
||
res.append(params) | ||
|
||
res = xr.concat(res, dim="scen") | ||
res = res.mean("scen") | ||
|
||
# TODO: remove np.float64(...) (only here so the tests pass) | ||
params_gv["AR_order_sel"] = AR_order_sel | ||
params_gv["AR_int"] = np.float64(res.trend.values) | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
params_gv["AR_coefs"] = res.coeffs.values.squeeze() | ||
params_gv["AR_std_innovs"] = np.float64(res.standard_deviation.values) | ||
|
||
# check if fitted AR process is stationary | ||
# (highly unlikely this test will ever fail but better safe than sorry) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import numpy as np | ||
import xarray as xr | ||
|
||
|
||
def _fit_auto_regression_xr(data, dim, lags): | ||
""" | ||
fit an auto regression - xarray wrapper | ||
|
||
Parameters | ||
---------- | ||
data : xr.DataArray | ||
A ``xr.DataArray`` to estimate the auto regression over. | ||
dim : str | ||
Dimension along which to fit the auto regression over. | ||
lags : int | ||
The number of lags to include in the model. | ||
|
||
Returns | ||
------- | ||
:obj:`xr.Dataset` | ||
Dataset containing the estimated parameters of the ``trend``, the AR ``coeffs`` | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
and the ``standard_deviation`` of the residuals. | ||
""" | ||
|
||
if not isinstance(data, xr.DataArray): | ||
raise TypeError(f"Expected a `xr.DataArray`, got {type(data)}") | ||
|
||
trend, coeffs, std = xr.apply_ufunc( | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_fit_auto_regression_np, | ||
data, | ||
input_core_dims=[[dim]], | ||
output_core_dims=((), ("lags",), ()), | ||
vectorize=True, | ||
output_dtypes=[float, float, float], | ||
kwargs={"lags": lags}, | ||
) | ||
|
||
# TODO: are the names appropriate? | ||
data_vars = {"trend": trend, "coeffs": coeffs, "standard_deviation": std} | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# TODO: add coords for lags? | ||
return xr.Dataset(data_vars) | ||
|
||
|
||
def _fit_auto_regression_np(data, lags): | ||
""" | ||
fit an auto regression - numpy wrapper | ||
|
||
Parameters | ||
---------- | ||
data : np.array | ||
A numpy array to estimate the auto regression over. Must be 1D. | ||
lags : int | ||
The number of lags to include in the model. | ||
|
||
Returns | ||
------- | ||
trend : :obj:`np.array` | ||
Trend part of the fited AR model. | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
coeffs : :obj:`np.array` | ||
Coefficients if the AR model. Will have as many entries as ``lags``. | ||
std :obj:`np.array` | ||
Standard deviation of the residuals. | ||
""" | ||
|
||
from statsmodels.tsa.ar_model import AutoReg | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why here and not wrapped in try except in top level? Is that an xarray pattern? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did this for the linear regression because the sklearn class has the same name as ours ( |
||
|
||
AR_model = AutoReg(data, lags=lags, old_names=False) | ||
AR_result = AR_model.fit() | ||
|
||
trend = AR_result.params[0] | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
coeffs = AR_result.params[1:] | ||
|
||
# sqrt of variance = standard deviation | ||
std = np.sqrt(AR_result.sigma2) | ||
|
||
return trend, coeffs, std | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from unittest import mock | ||
|
||
import numpy as np | ||
import pytest | ||
import xarray as xr | ||
|
||
import mesmer.core.auto_regression | ||
from mesmer.core.utils import _check_dataarray_form, _check_dataset_form | ||
|
||
from .utils import trend_data_1D, trend_data_2D | ||
|
||
|
||
@pytest.mark.parametrize("obj", [xr.Dataset(), None]) | ||
def test_auto_regression_xr_errors(obj): | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
with pytest.raises(TypeError, match="Expected a `xr.DataArray`"): | ||
mesmer.core.auto_regression._fit_auto_regression_xr(obj, "dim", lags=1) | ||
|
||
|
||
@pytest.mark.parametrize("lags", [1, 2]) | ||
def test_auto_regression_xr_1D(lags): | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
data = trend_data_1D() | ||
res = mesmer.core.auto_regression._fit_auto_regression_xr(data, "time", lags=lags) | ||
|
||
_check_dataset_form( | ||
res, | ||
"_fit_auto_regression_result", | ||
required_vars=["trend", "coeffs", "standard_deviation"], | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
_check_dataarray_form(res.trend, "trend", ndim=0, shape=()) | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_check_dataarray_form( | ||
res.coeffs, "coeffs", ndim=1, required_dims={"lags"}, shape=(lags,) | ||
) | ||
_check_dataarray_form( | ||
res.standard_deviation, "standard_deviation", ndim=0, shape=() | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("lags", [1, 2]) | ||
def test_auto_regression_xr_2D(lags): | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
data = trend_data_2D() | ||
res = mesmer.core.auto_regression._fit_auto_regression_xr(data, "time", lags=lags) | ||
|
||
(n_cells,) = data.cells.shape | ||
|
||
_check_dataset_form( | ||
res, | ||
"_fit_auto_regression_result", | ||
required_vars=["trend", "coeffs", "standard_deviation"], | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
_check_dataarray_form(res.trend, "trend", ndim=1, shape=(n_cells,)) | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_check_dataarray_form( | ||
res.coeffs, | ||
"coeffs", | ||
ndim=2, | ||
required_dims={"cells", "lags"}, | ||
shape=(n_cells, lags), | ||
) | ||
_check_dataarray_form( | ||
res.standard_deviation, "standard_deviation", ndim=1, shape=(n_cells,) | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("lags", [1, 2]) | ||
def test_auto_regression_np(lags): | ||
mathause marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
data = np.array([0, 1, 3.14]) | ||
|
||
mock_auto_regressor = mock.Mock() | ||
mock_auto_regressor.params = np.array([0.1, 0.25]) | ||
mock_auto_regressor.sigma2 = 3.14 | ||
|
||
with mock.patch( | ||
"statsmodels.tsa.ar_model.AutoReg" | ||
) as mocked_auto_regression, mock.patch( | ||
"statsmodels.tsa.ar_model.AutoRegResults" | ||
) as mocked_auto_regression_result: | ||
|
||
mocked_auto_regression.return_value = mocked_auto_regression_result | ||
mocked_auto_regression_result.return_value = mock_auto_regressor | ||
|
||
mesmer.core.auto_regression._fit_auto_regression_np(data, lags=lags) | ||
|
||
mocked_auto_regression.assert_called_once() | ||
mocked_auto_regression.assert_called_with(data, lags=lags, old_names=False) | ||
|
||
mocked_auto_regression_result.fit.assert_called_once() | ||
mocked_auto_regression_result.fit.assert_called_with() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rename to
result
?res
might stand for residuals.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep although if the function is small enough it will be clear from the context