Skip to content

Commit

Permalink
Merge pull request statsmodels#1039 from jseabold/fix-arima-int-name
Browse files Browse the repository at this point in the history
BUG: Fix ARIMA bugs for small data and data with integer names. Closes statsmodels#1038.
  • Loading branch information
jseabold committed Aug 14, 2013
2 parents 966e85f + 98afc4d commit 2d5503a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 0 deletions.
7 changes: 7 additions & 0 deletions statsmodels/tsa/arima_model.py
Expand Up @@ -399,6 +399,13 @@ def _fit_start_params_hr(self, order):
armod = AR(endog).fit(ic='bic', trend='nc')
arcoefs_tmp = armod.params
p_tmp = armod.k_ar
# it's possible in small samples that optimal lag-order
# doesn't leave enough obs. No consistent way to fix.
if p_tmp + q >= len(endog):
raise ValueError("Proper starting parameters cannot"
" be found for this order with this number "
"of observations. Use the start_params "
"argument.")
resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp,
trim='both'), arcoefs_tmp)
if p < p_tmp + q:
Expand Down
26 changes: 26 additions & 0 deletions statsmodels/tsa/tests/test_arima.py
Expand Up @@ -1796,6 +1796,32 @@ def test_bad_start_params():
arima_mod = ARIMA(np.log(inv), (1,1,2))
assert_raises(ValueError, mod.fit)

def test_arima_small_data_bug():
# Issue 1038, too few observations with given order
from datetime import datetime
import statsmodels.api as sm

vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474]

dr = dates_from_range("1990q1", length=len(vals))
ts = pandas.TimeSeries(vals, index=dr)
df = pandas.DataFrame(ts)
mod = sm.tsa.ARIMA(df, (2, 0, 2))
assert_raises(ValueError, mod.fit)

def test_arima_dataframe_integer_name():
# Smoke Test for Issue 1038
from datetime import datetime
import statsmodels.api as sm

vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474,
94.0, 96.5, 93.3, 97.5, 96.3, 92.]

dr = dates_from_range("1990q1", length=len(vals))
ts = pandas.TimeSeries(vals, index=dr)
df = pandas.DataFrame(ts)
mod = sm.tsa.ARIMA(df, (2, 0, 2))

if __name__ == "__main__":
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb'], exit=False)
2 changes: 2 additions & 0 deletions statsmodels/tsa/vector_ar/util.py
Expand Up @@ -60,6 +60,8 @@ def make_lag_names(names, lag_order, trendorder=1):
# take care of lagged endogenous names
for i in range(1, lag_order + 1):
for name in names:
if not isinstance(name, basestring):
name = str(name) # will need consistent unicode handling
lag_names.append('L'+str(i)+'.'+name)

# handle the constant name
Expand Down

0 comments on commit 2d5503a

Please sign in to comment.