Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract select_ar_order #176

Merged
merged 9 commits into from Jul 12, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 19 additions & 26 deletions mesmer/calibrate_mesmer/train_gv.py
Expand Up @@ -14,9 +14,8 @@
import statsmodels.api as sm
import xarray as xr
from packaging.version import Version
from statsmodels.tsa.ar_model import ar_select_order

from mesmer.core.auto_regression import _fit_auto_regression_xr
from mesmer.core.auto_regression import _fit_auto_regression_xr, _select_ar_order_xr


def train_gv(gv, targ, esm, cfg, save_params=True, **kwargs):
Expand Down Expand Up @@ -167,33 +166,27 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
params_gv["max_lag"] = max_lag
params_gv["sel_crit"] = sel_crit

if Version(xr.__version__) >= Version("2022.03.0"):
method = "method"
else:
method = "interpolation"

# select the AR Order
nr_scens = len(gv.keys())
AR_order_scens_tmp = np.zeros(nr_scens)
AR_order_scen = list()
for scen in gv.keys():

# np.percentile renamed the keyword in numpy v1.22
if Version(np.__version__) >= Version("1.22.0"):
pct_kwargs = {"q": 50, "method": "nearest"}
else:
pct_kwargs = {"q": 50, "interpolation": "nearest"}
# create temporary DataArray
data = xr.DataArray(gv[scen], dims=["run", "time"])

for scen_idx, scen in enumerate(gv.keys()):
nr_runs = gv[scen].shape[0]
AR_order_runs_tmp = np.zeros(nr_runs)

for run in np.arange(nr_runs):
run_ar_lags = ar_select_order(
gv[scen][run], maxlag=max_lag, ic=sel_crit, old_names=False
).ar_lags
# if order > 0 is selected,add selected order to vector
if len(run_ar_lags) > 0:
AR_order_runs_tmp[run] = run_ar_lags[-1]

# interpolation is not a good way to go here because it could lead to an AR
# order that wasn't chosen by run -> avoid it by just taking nearest
AR_order_scens_tmp[scen_idx] = np.percentile(AR_order_runs_tmp, **pct_kwargs)

AR_order_sel = int(np.percentile(AR_order_scens_tmp, **pct_kwargs))
AR_order = _select_ar_order_xr(data, "time", maxlag=max_lag, ic=sel_crit)
mathause marked this conversation as resolved.
Show resolved Hide resolved

# median over all ensemble members ("nearest" ensures an 'existing' lag is selected)
AR_order = AR_order.quantile(q=0.5, **{method: "nearest"})
AR_order_scen.append(AR_order)

# median over all scenarios
AR_order_scen = xr.concat(AR_order_scen, dim="scen")
AR_order_sel = int(AR_order.quantile(q=0.5, **{method: "nearest"}).item())

# determine the AR params for the selected AR order
params_scen = list()
Expand Down
70 changes: 70 additions & 0 deletions mesmer/core/auto_regression.py
Expand Up @@ -2,6 +2,76 @@
import xarray as xr


def _select_ar_order_xr(data, dim, maxlag, ic="bic"):
mathause marked this conversation as resolved.
Show resolved Hide resolved
"""Select the order of an autoregressive AR-X(p) process - xarray wrapper
mathause marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
data : array_like
A 1-d endogenous response variable. The independent variable.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To update

dim : str
Dimension along which to determine the order.
maxlag : int
The maximum lag to consider.
ic : {'aic', 'hqic', 'bic'}, default 'bic'
The information criterion to use in the selection.

Returns
-------
selected_order : DataArray
Array indicating the selected order with the same size as the input but ``dim``
removed.

Notes
-----
Only full models can be selected.
"""

selected_order = xr.apply_ufunc(
_select_order_np,
data,
input_core_dims=[[dim]],
output_core_dims=((),),
vectorize=True,
output_dtypes=[int],
kwargs={"maxlag": maxlag, "ic": ic},
)

# remove zeros
selected_order[selected_order == 0] = np.NaN

return selected_order


def _select_order_np(data, maxlag, ic="bic"):
"""Select the order of an autoregressive AR-X(p) process - numpy wrapper
mathause marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
data : array_like
A 1-d endogenous response variable. The independent variable.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To update

maxlag : int
The maximum lag to consider.
ic : {'aic', 'hqic', 'bic'}, default 'bic'
The information criterion to use in the selection.

Returns
-------
selected_order : int
The selected order.

Notes
-----
Only full models can be selected.
"""

from statsmodels.tsa.ar_model import ar_select_order
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better to have the import of "ar_select_order" just after the import of xarray? To have it once and for all?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right it is more common to have the imports at the beginning of the file. However, I will probably keep it here to avoid confusion because the name would be similar to select_ar_order. Tangentially relevant: #177

Just as a side remark - imports are cached in python so there is no time penalty to have it in a function as compared to the top.


ar_lags = ar_select_order(data, maxlag=maxlag, ic=ic, old_names=False).ar_lags

return ar_lags[-1]


def _draw_auto_regression_correlated_np(
*, intercept, coefs, covariance, n_samples, n_ts, seed, buffer
):
Expand Down