From 9653fe998424f0e878c4ccd2158f4f8bdc59b682 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 12 Aug 2013 13:51:45 -0400 Subject: [PATCH 1/4] TST: Test for issue 1038. --- statsmodels/tsa/tests/test_arima.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py index bc879401c54..a1ac6d506a3 100644 --- a/statsmodels/tsa/tests/test_arima.py +++ b/statsmodels/tsa/tests/test_arima.py @@ -1796,6 +1796,34 @@ def test_bad_start_params(): arima_mod = ARIMA(np.log(inv), (1,1,2)) assert_raises(ValueError, mod.fit) +def test_arima_small_data_bug(): + # Issue 1038, too few observations with given order + from datetime import datetime + import pandas as pd + import statsmodels.api as sm + + vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474] + + dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS') + ts = pd.TimeSeries(vals, index=dr) + df = pd.DataFrame(ts) + mod = sm.tsa.ARIMA(df, (2, 0, 2)) + assert_raises(ValueError, mod.fit) + +def test_arima_dataframe_integer_name(): + # Smoke Test for Issue 1038 + from datetime import datetime + import pandas as pd + import statsmodels.api as sm + + vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474, + 94.0, 96.5, 93.3, 97.5, 96.3, 92.] + + dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS') + ts = pd.TimeSeries(vals, index=dr) + df = pd.DataFrame(ts) + mod = sm.tsa.ARIMA(df, (2, 0, 2)) + if __name__ == "__main__": import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb'], exit=False) From bfc928681fefdc25fcfe1568f7289e3a53c3ae2d Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 12 Aug 2013 13:52:37 -0400 Subject: [PATCH 2/4] BUG: Handle non-string names in lag name making. --- statsmodels/tsa/vector_ar/util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/statsmodels/tsa/vector_ar/util.py b/statsmodels/tsa/vector_ar/util.py index 30b3bdee132..450a84bcf44 100644 --- a/statsmodels/tsa/vector_ar/util.py +++ b/statsmodels/tsa/vector_ar/util.py @@ -60,6 +60,8 @@ def make_lag_names(names, lag_order, trendorder=1): # take care of lagged endogenous names for i in range(1, lag_order + 1): for name in names: + if not isinstance(name, basestring): + name = str(name) # will need consistent unicode handling lag_names.append('L'+str(i)+'.'+name) # handle the constant name From 34f526f2a8f1758e11270b23c3df99422258e436 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 12 Aug 2013 13:53:02 -0400 Subject: [PATCH 3/4] BUG: Fail gracefully when not enough obs given for order. --- statsmodels/tsa/arima_model.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index 5c06a19b9cf..0320a1f38cf 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -399,6 +399,13 @@ def _fit_start_params_hr(self, order): armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar + # it's possible in small samples that optimal lag-order + # doesn't leave enough obs. No consistent way to fix. + if p_tmp + q >= len(endog): + raise ValueError("Proper starting parameters cannot" + " be found for this order with this number " + "of observations. Use the start_params " + "argument.") resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: From 98afc4d6be07b86be3d77d3f7fd249ec7dab1e56 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 12 Aug 2013 14:10:47 -0400 Subject: [PATCH 4/4] TST: Make test compatible with pandas 0.7.x --- statsmodels/tsa/tests/test_arima.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py index a1ac6d506a3..efaf701b41b 100644 --- a/statsmodels/tsa/tests/test_arima.py +++ b/statsmodels/tsa/tests/test_arima.py @@ -1799,29 +1799,27 @@ def test_bad_start_params(): def test_arima_small_data_bug(): # Issue 1038, too few observations with given order from datetime import datetime - import pandas as pd import statsmodels.api as sm vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474] - dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS') - ts = pd.TimeSeries(vals, index=dr) - df = pd.DataFrame(ts) + dr = dates_from_range("1990q1", length=len(vals)) + ts = pandas.TimeSeries(vals, index=dr) + df = pandas.DataFrame(ts) mod = sm.tsa.ARIMA(df, (2, 0, 2)) assert_raises(ValueError, mod.fit) def test_arima_dataframe_integer_name(): # Smoke Test for Issue 1038 from datetime import datetime - import pandas as pd import statsmodels.api as sm vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474, 94.0, 96.5, 93.3, 97.5, 96.3, 92.] - dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS') - ts = pd.TimeSeries(vals, index=dr) - df = pd.DataFrame(ts) + dr = dates_from_range("1990q1", length=len(vals)) + ts = pandas.TimeSeries(vals, index=dr) + df = pandas.DataFrame(ts) mod = sm.tsa.ARIMA(df, (2, 0, 2)) if __name__ == "__main__":