diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index 5c06a19b9cf..0320a1f38cf 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -399,6 +399,13 @@ def _fit_start_params_hr(self, order): armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar + # it's possible in small samples that optimal lag-order + # doesn't leave enough obs. No consistent way to fix. + if p_tmp + q >= len(endog): + raise ValueError("Proper starting parameters cannot" + " be found for this order with this number " + "of observations. Use the start_params " + "argument.") resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py index bc879401c54..efaf701b41b 100644 --- a/statsmodels/tsa/tests/test_arima.py +++ b/statsmodels/tsa/tests/test_arima.py @@ -1796,6 +1796,32 @@ def test_bad_start_params(): arima_mod = ARIMA(np.log(inv), (1,1,2)) assert_raises(ValueError, mod.fit) +def test_arima_small_data_bug(): + # Issue 1038, too few observations with given order + from datetime import datetime + import statsmodels.api as sm + + vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474] + + dr = dates_from_range("1990q1", length=len(vals)) + ts = pandas.TimeSeries(vals, index=dr) + df = pandas.DataFrame(ts) + mod = sm.tsa.ARIMA(df, (2, 0, 2)) + assert_raises(ValueError, mod.fit) + +def test_arima_dataframe_integer_name(): + # Smoke Test for Issue 1038 + from datetime import datetime + import statsmodels.api as sm + + vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474, + 94.0, 96.5, 93.3, 97.5, 96.3, 92.] + + dr = dates_from_range("1990q1", length=len(vals)) + ts = pandas.TimeSeries(vals, index=dr) + df = pandas.DataFrame(ts) + mod = sm.tsa.ARIMA(df, (2, 0, 2)) + if __name__ == "__main__": import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb'], exit=False) diff --git a/statsmodels/tsa/vector_ar/util.py b/statsmodels/tsa/vector_ar/util.py index 30b3bdee132..450a84bcf44 100644 --- a/statsmodels/tsa/vector_ar/util.py +++ b/statsmodels/tsa/vector_ar/util.py @@ -60,6 +60,8 @@ def make_lag_names(names, lag_order, trendorder=1): # take care of lagged endogenous names for i in range(1, lag_order + 1): for name in names: + if not isinstance(name, basestring): + name = str(name) # will need consistent unicode handling lag_names.append('L'+str(i)+'.'+name) # handle the constant name