From 9653fe998424f0e878c4ccd2158f4f8bdc59b682 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Mon, 12 Aug 2013 13:51:45 -0400
Subject: [PATCH 1/4] TST: Test for issue 1038.

---
 statsmodels/tsa/tests/test_arima.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py
index bc879401c54..a1ac6d506a3 100644
--- a/statsmodels/tsa/tests/test_arima.py
+++ b/statsmodels/tsa/tests/test_arima.py
@@ -1796,6 +1796,34 @@ def test_bad_start_params():
     arima_mod = ARIMA(np.log(inv), (1,1,2))
     assert_raises(ValueError, mod.fit)
 
+def test_arima_small_data_bug():
+    # Issue 1038, too few observations with given order
+    from datetime import datetime
+    import pandas as pd
+    import statsmodels.api as sm
+
+    vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474]
+
+    dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS')
+    ts = pd.TimeSeries(vals, index=dr)
+    df = pd.DataFrame(ts)
+    mod = sm.tsa.ARIMA(df, (2, 0, 2))
+    assert_raises(ValueError, mod.fit)
+
+def test_arima_dataframe_integer_name():
+    # Smoke Test for Issue 1038
+    from datetime import datetime
+    import pandas as pd
+    import statsmodels.api as sm
+
+    vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474,
+            94.0, 96.5, 93.3, 97.5, 96.3, 92.]
+
+    dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS')
+    ts = pd.TimeSeries(vals, index=dr)
+    df = pd.DataFrame(ts)
+    mod = sm.tsa.ARIMA(df, (2, 0, 2))
+
 if __name__ == "__main__":
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb'], exit=False)

From bfc928681fefdc25fcfe1568f7289e3a53c3ae2d Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Mon, 12 Aug 2013 13:52:37 -0400
Subject: [PATCH 2/4] BUG: Handle non-string names in lag name making.

---
 statsmodels/tsa/vector_ar/util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/statsmodels/tsa/vector_ar/util.py b/statsmodels/tsa/vector_ar/util.py
index 30b3bdee132..450a84bcf44 100644
--- a/statsmodels/tsa/vector_ar/util.py
+++ b/statsmodels/tsa/vector_ar/util.py
@@ -60,6 +60,8 @@ def make_lag_names(names, lag_order, trendorder=1):
     # take care of lagged endogenous names
     for i in range(1, lag_order + 1):
         for name in names:
+            if not isinstance(name, basestring):
+                name = str(name) # will need consistent unicode handling
             lag_names.append('L'+str(i)+'.'+name)
 
     # handle the constant name

From 34f526f2a8f1758e11270b23c3df99422258e436 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Mon, 12 Aug 2013 13:53:02 -0400
Subject: [PATCH 3/4] BUG: Fail gracefully when not enough obs given for order.

---
 statsmodels/tsa/arima_model.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py
index 5c06a19b9cf..0320a1f38cf 100644
--- a/statsmodels/tsa/arima_model.py
+++ b/statsmodels/tsa/arima_model.py
@@ -399,6 +399,13 @@ def _fit_start_params_hr(self, order):
                 armod = AR(endog).fit(ic='bic', trend='nc')
                 arcoefs_tmp = armod.params
                 p_tmp = armod.k_ar
+                # it's possible in small samples that optimal lag-order
+                # doesn't leave enough obs. No consistent way to fix.
+                if p_tmp + q >= len(endog):
+                    raise ValueError("Proper starting parameters cannot"
+                            " be found for this order with this number "
+                            "of observations. Use the start_params "
+                            "argument.")
                 resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp,
                                 trim='both'), arcoefs_tmp)
                 if p < p_tmp + q:

From 98afc4d6be07b86be3d77d3f7fd249ec7dab1e56 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Mon, 12 Aug 2013 14:10:47 -0400
Subject: [PATCH 4/4] TST: Make test compatible with pandas 0.7.x

---
 statsmodels/tsa/tests/test_arima.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py
index a1ac6d506a3..efaf701b41b 100644
--- a/statsmodels/tsa/tests/test_arima.py
+++ b/statsmodels/tsa/tests/test_arima.py
@@ -1799,29 +1799,27 @@ def test_bad_start_params():
 def test_arima_small_data_bug():
     # Issue 1038, too few observations with given order
     from datetime import datetime
-    import pandas as pd
     import statsmodels.api as sm
 
     vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474]
 
-    dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS')
-    ts = pd.TimeSeries(vals, index=dr)
-    df = pd.DataFrame(ts)
+    dr = dates_from_range("1990q1", length=len(vals))
+    ts = pandas.TimeSeries(vals, index=dr)
+    df = pandas.DataFrame(ts)
     mod = sm.tsa.ARIMA(df, (2, 0, 2))
     assert_raises(ValueError, mod.fit)
 
 def test_arima_dataframe_integer_name():
     # Smoke Test for Issue 1038
     from datetime import datetime
-    import pandas as pd
     import statsmodels.api as sm
 
     vals = [96.2, 98.3, 99.1, 95.5, 94.0, 87.1, 87.9, 86.7402777504474,
             94.0, 96.5, 93.3, 97.5, 96.3, 92.]
 
-    dr = pd.date_range(datetime.today(), periods=len(vals), freq='AS')
-    ts = pd.TimeSeries(vals, index=dr)
-    df = pd.DataFrame(ts)
+    dr = dates_from_range("1990q1", length=len(vals))
+    ts = pandas.TimeSeries(vals, index=dr)
+    df = pandas.DataFrame(ts)
     mod = sm.tsa.ARIMA(df, (2, 0, 2))
 
 if __name__ == "__main__":