Merge pull request statsmodels#997 from josef-pkt/misc_13

TST/BUG: Bug fixes and tests for diagnostic and pylint fixes.
PierreBdR · Jul 31, 2013 · 942aaec · 942aaec
2 parents b47911a + 1d42b5b
commit 942aaec
Show file tree

Hide file tree

Showing 16 changed files with 132 additions and 93 deletions.
diff --git a/statsmodels/examples/try_power2.py b/statsmodels/examples/try_power2.py
@@ -48,7 +48,6 @@
     nobs_p2 = ttind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power)
     print 'nobs  ', nobs_p2
     print 'effect', ttind_solve_power(effect_size=None, nobs1=nobs_p2, alpha=alpha, power=power), effect_size
-
     print 'alpha ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=None, power=power), alpha
     print 'power  ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=alpha, power=None), power
     print 'ratio  ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=alpha, power=power, ratio=None), 1

diff --git a/statsmodels/regression/tests/test_glsar_stata.py b/statsmodels/regression/tests/test_glsar_stata.py
@@ -7,20 +7,14 @@
 """
 
 import numpy as np
-from numpy.testing import (assert_almost_equal, assert_equal, assert_,
-                           assert_approx_equal, assert_array_less)
+from numpy.testing import assert_almost_equal, assert_allclose
 
-from statsmodels.regression.linear_model import OLS, GLSAR
+from statsmodels.regression.linear_model import GLSAR
 from statsmodels.tools.tools import add_constant
 from statsmodels.datasets import macrodata
 
-def assert_allclose(actual, desired, rtol=1e-7, atol=0,
-                    err_msg='', verbose=True):
-    assert_(np.allclose(actual, desired, rtol=rtol, atol=atol))
-
-
-class CheckStataResults(object):
 
+class CheckStataResultsMixin(object):
 
     def test_params_table(self):
         res, results = self.res, self.results
@@ -30,7 +24,7 @@ def test_params_table(self):
         assert_allclose(res.tvalues, results.tvalues, atol=0, rtol=0.004)
         assert_allclose(res.pvalues, results.pvalues, atol=1e-7, rtol=0.004)
 
-class CheckStataResultsP(CheckStataResults):
+class CheckStataResultsPMixin(CheckStataResultsMixin):
 
     def test_predicted(self):
         res, results = self.res, self.results
@@ -40,7 +34,7 @@ def test_predicted(self):
         #not yet
         #assert_almost_equal(res.fittedvalues_se, results.fittedvalues_se, 4)
 
-class TestGLSARCorc(CheckStataResultsP):
+class TestGLSARCorc(CheckStataResultsPMixin):
 
     @classmethod
     def setup_class(self):
@@ -56,7 +50,7 @@ def setup_class(self):
         self.results = results
 
     def test_rho(self):
-        assert_almost_equal(self.res.model.rho, self.results.rho, 3)
+        assert_almost_equal(self.res.model.rho, self.results.rho, 3)  # pylint: disable-msg=E1101
 
 
 if __name__=="__main__":

diff --git a/statsmodels/regression/tests/test_quantile_regression.py b/statsmodels/regression/tests/test_quantile_regression.py
@@ -1,9 +1,8 @@
-import os
-import scipy
+
+import scipy.stats
 import numpy as np
-import pandas as pd
 import statsmodels.api as sm
-from numpy.testing import *
+from numpy.testing import assert_allclose, assert_equal, assert_almost_equal
 from patsy import dmatrices   # pylint: disable=E0611
 from statsmodels.regression.quantile_regression import QuantReg
 from results_quantile_regression import (

diff --git a/statsmodels/sandbox/examples/example_mle.py b/statsmodels/sandbox/examples/example_mle.py
@@ -43,7 +43,7 @@
 res = mod.fit()
 #print mod.results.params
 print 'OLS'
-print mod._results.params
+print res.params
 print 'MLE'
 #resfmin2 = optimize.fmin(f, mod.results.params*0.9, maxfun=5000, maxiter=5000, xtol=1e-10, ftol= 1e-10)
 resfmin2 = optimize.fmin(f, np.ones(7), maxfun=5000, maxiter=5000, xtol=1e-10, ftol= 1e-10)

diff --git a/statsmodels/sandbox/examples/example_pca_regression.py b/statsmodels/sandbox/examples/example_pca_regression.py
@@ -82,7 +82,7 @@
     for inidx, outidx in LeaveOneOut(len(y0)):
         resl1o = sm.OLS(y0[inidx], fact_wconst[inidx,:]).fit()
         #print data.endog[outidx], res.model.predict(data.exog[outidx,:]),
-        prederr2 += (y0[outidx] - resl1o.model.predict(fact_wconst[outidx,:]))**2.
+        prederr2 += (y0[outidx] - resl1o.predict(fact_wconst[outidx,:]))**2.
     results.append([k, res.aic, res.bic, res.rsquared_adj, prederr2])
 
 results = np.array(results)

diff --git a/statsmodels/sandbox/examples/try_quantile_regression.py b/statsmodels/sandbox/examples/try_quantile_regression.py
@@ -7,27 +7,27 @@
 import numpy as np
 import statsmodels.api as sm
 
-from statsmodels.sandbox.regression.quantile_regression import quantilereg
-
+from statsmodels.regression.quantile_regression import QuantReg
 sige = 5
 nobs, k_vars = 500, 5
 x = np.random.randn(nobs, k_vars)
 #x[:,0] = 1
 y = x.sum(1) + sige * (np.random.randn(nobs)/2 + 1)**3
 p = 0.5
-res_qr = quantilereg(y,x,p)
+exog = np.column_stack((np.ones(nobs), x))
+res_qr = QuantReg(y, exog).fit(p)
 
-res_qr2 = quantilereg(y,x,0.25)
-res_qr3 = quantilereg(y,x,0.75)
-res_ols = sm.OLS(y, np.column_stack((np.ones(nobs), x))).fit()
+res_qr2 = QuantReg(y, exog).fit(0.25)
+res_qr3 = QuantReg(y, exog).fit(0.75)
+res_ols = sm.OLS(y, exog).fit()
 
 
 ##print 'ols ', res_ols.params
 ##print '0.25', res_qr2
 ##print '0.5 ', res_qr
 ##print '0.75', res_qr3
 
-params = [res_ols.params, res_qr2, res_qr, res_qr3]
+params = [res_ols.params, res_qr2.params, res_qr.params, res_qr3.params]
 labels = ['ols', 'qr 0.25', 'qr 0.5', 'qr 0.75']
 
 import matplotlib.pyplot as plt

diff --git a/statsmodels/sandbox/examples/try_quantile_regression1.py b/statsmodels/sandbox/examples/try_quantile_regression1.py
@@ -10,25 +10,23 @@
 from scipy import stats
 import statsmodels.api as sm
 
-from statsmodels.sandbox.regression.quantile_regression import quantilereg
+from statsmodels.regression.quantile_regression import QuantReg
 
 sige = 0.1
-nobs, k_vars = 1500, 3
+nobs, k_vars = 500, 3
 x = np.random.uniform(-1, 1, size=nobs)
 x.sort()
 exog = np.vander(x, k_vars+1)[:,::-1]
 mix = 0.1 * stats.norm.pdf(x[:,None], loc=np.linspace(-0.5, 0.75, 4), scale=0.01).sum(1)
 y = exog.sum(1) + mix + sige * (np.random.randn(nobs)/2 + 1)**3
 
 p = 0.5
-x0 = exog[:, 1:]    #quantilereg includes constant already!
-res_qr = quantilereg(y, x0, p)
-
-res_qr2 = quantilereg(y, x0, 0.1)
-res_qr3 = quantilereg(y, x0, 0.75)
+res_qr = QuantReg(y, exog).fit(p)
+res_qr2 = QuantReg(y, exog).fit(0.1)
+res_qr3 = QuantReg(y, exog).fit(0.75)
 res_ols = sm.OLS(y, exog).fit()
 
-params = [res_ols.params, res_qr2, res_qr, res_qr3]
+params = [res_ols.params, res_qr2.params, res_qr.params, res_qr3.params]
 labels = ['ols', 'qr 0.1', 'qr 0.5', 'qr 0.75']
 
 import matplotlib.pyplot as plt

diff --git a/statsmodels/stats/power.py b/statsmodels/stats/power.py
@@ -53,11 +53,20 @@ def ttest_power(effect_size, nobs, alpha, df=None, alternative='two-sided'):
     pow_ = 0
     if alternative in ['two-sided', '2s', 'larger']:
         crit_upp = stats.t.isf(alpha_, df)
+        #print crit_upp, df, d*np.sqrt(nobs)
         # use private methods, generic methods return nan with negative d
-        pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
+        if np.any(np.isnan(crit_upp)):
+            # avoid endless loop, https://github.com/scipy/scipy/issues/2667
+            pow_ = np.nan
+        else:
+            pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
     if alternative in ['two-sided', '2s', 'smaller']:
         crit_low = stats.t.ppf(alpha_, df)
-        pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
+        #print crit_low, df, d*np.sqrt(nobs)
+        if np.any(np.isnan(crit_low)):
+            pow_ = np.nan
+        else:
+            pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
     return pow_
 
 def normal_power(effect_size, nobs, alpha, alternative='two-sided', sigma=1.):
@@ -167,6 +176,8 @@ def __init__(self, **kwds):
             self.start_bqexp[key] = dict(low=1., start_upp=50.)
         for key in ['ratio']:
             self.start_bqexp[key] = dict(low=1e-8, start_upp=2)
+        for key in ['alpha']:
+            self.start_bqexp[key] = dict(low=1e-12, upp=1 - 1e-12)
 
     def power(self, *args, **kwds):
         raise NotImplementedError
@@ -207,9 +218,14 @@ def solve_power(self, **kwds):
             del kwds['power']
             return self.power(**kwds)
 
+        self._counter = 0
         def func(x):
             kwds[key] = x
             fval = self._power_identity(**kwds)
+            self._counter += 1
+            #print self._counter,
+            if self._counter > 500:
+                raise RuntimeError('possible endless loop (500 NaNs)')
             if np.isnan(fval):
                 return np.inf
             else:
@@ -224,6 +240,7 @@ def func(x):
 
         fit_kwds = self.start_bqexp[key]
         fit_res = []
+        #print vars()
         try:
             val, res = brentq_expanding(func, full_output=True, **fit_kwds)
             failed = False
@@ -240,6 +257,7 @@ def func(x):
             #TODO: check more cases to make this robust
             val, infodict, ier, msg = optimize.fsolve(func, start_value,
                                                       full_output=True) #scalar
+            #val = optimize.newton(func, start_value) #scalar
             fval = infodict['fvec']
             fit_res.append(infodict)
             if ier == 1 and np.abs(fval) < 1e-4 :
@@ -512,8 +530,8 @@ def power(self, effect_size, nobs1, alpha, ratio=1, df=None,
             df = (nobs1 - 1 + nobs2 - 1)
 
         nobs = 1./ (1. / nobs1 + 1. / nobs2)
-        return ttest_power(effect_size, nobs, alpha, df=df,
-                           alternative=alternative)
+        #print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative)
+        return ttest_power(effect_size, nobs, alpha, df=df, alternative=alternative)
 
     #method is only added to have explicit keywords and docstring
     def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None,

diff --git a/statsmodels/stats/tests/test_diagnostic.py b/statsmodels/stats/tests/test_diagnostic.py
@@ -592,10 +592,10 @@ def test_influence(self):
         lsdiag = json.load(fp)
 
         #basic
-        assert_almost_equal(lsdiag['cov.scaled'],
-                            res.cov_params().ravel(), decimal=14)
-        assert_almost_equal(lsdiag['cov.unscaled'],
-                            res.normalized_cov_params.ravel(), decimal=14)
+        assert_almost_equal(np.array(lsdiag['cov.scaled']).reshape(3, 3),
+                            res.cov_params(), decimal=14)
+        assert_almost_equal(np.array(lsdiag['cov.unscaled']).reshape(3, 3),
+                            res.normalized_cov_params, decimal=14)
 
         c0, c1 = infl.cooks_distance #TODO: what's c1
 
@@ -642,6 +642,35 @@ def test_influence(self):
         '''
 
 
+class TestDiagnosticGPandas(TestDiagnosticG):
+
+    def __init__(self):
+        d = macrodata.load_pandas().data
+        #growth rates
+        d['gs_l_realinv'] = 400 * np.log(d['realinv']).diff()
+        d['gs_l_realgdp'] = 400 * np.log(d['realgdp']).diff()
+        d['lint'] = d['realint'].shift(1)
+        d['tbilrate'] = d['tbilrate'].shift(1)
+
+        d = d.dropna()
+        self.d = d
+        endogg = d['gs_l_realinv']
+        exogg = add_constant(d[['gs_l_realgdp', 'lint']])
+        exogg2 = add_constant(d[['gs_l_realgdp', 'tbilrate']])
+        exogg3 = add_constant(d[['gs_l_realgdp']])
+
+        res_ols = OLS(endogg, exogg).fit()
+        res_ols2 = OLS(endogg, exogg2).fit()
+
+        res_ols3 = OLS(endogg, exogg3).fit()
+
+        self.res = res_ols
+        self.res2 = res_ols2
+        self.res3 = res_ols3
+        self.endog = self.res.model.endog
+        self.exog = self.res.model.exog
+
+
 def grangertest():
     #> gt = grangertest(ginv, ggdp, order=4)
     #> gt
@@ -816,7 +845,7 @@ def test_outlier_test():
     res2 = np.c_[rstudent, unadj_p, bonf_p]
     res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True)
     np.testing.assert_almost_equal(res.values, res2, 7)
-    np.testing.assert_equal(res.index.tolist(), sorted_labels)
+    np.testing.assert_equal(res.index.tolist(), sorted_labels)  # pylint: disable-msg=E1103
 
 
 if __name__ == '__main__':

diff --git a/statsmodels/stats/tests/test_groups_sw.py b/statsmodels/stats/tests/test_groups_sw.py
@@ -12,11 +12,11 @@
 import statsmodels.stats.sandwich_covariance as sw
 from statsmodels.tools.grouputils import Group, GroupSorted
 
-class CheckPanelLag(object):
+class CheckPanelLagMixin(object):
 
     def calculate(self):
-        self.g = g = GroupSorted(self.gind)
-        self.alla = [(lag, sw.lagged_groups(self.x, lag, g.groupidx))
+        self.g = g = GroupSorted(self.gind)  # pylint: disable-msg=W0201
+        self.alla = [(lag, sw.lagged_groups(self.x, lag, g.groupidx))  # pylint: disable-msg=W0201
                          for lag in range(5)]
 
     def test_values(self):
@@ -30,7 +30,7 @@ def test_raises(self):
                       self.g.groupidx)
 
 
-class TestBalanced(CheckPanelLag):
+class TestBalanced(CheckPanelLagMixin):
 
     def __init__(self):
         self.gind = np.repeat([0,1,2], 5)
@@ -50,7 +50,7 @@ def __init__(self):
             }
         self.calculate()
 
-class TestUnBalanced(CheckPanelLag):
+class TestUnBalanced(CheckPanelLagMixin):
 
     def __init__(self):
         self.gind = gind = np.repeat([0,1,2], [3, 5, 10])

diff --git a/statsmodels/stats/tests/test_multi.py b/statsmodels/stats/tests/test_multi.py
@@ -323,7 +323,6 @@ def test_tukeyhsd():
 
     m_r = [94.39167, 102.54167,  91.13333, 118.20000,  99.18333]
     myres = tukeyhsd(m_r, 6, 110.8, alpha=0.05, df=4)
-    from numpy.testing import assert_almost_equal, assert_equal
     pairs, reject, meandiffs, std_pairs, confint, q_crit = myres[:6]
     assert_almost_equal(meandiffs, res[:, 0], decimal=5)
     assert_almost_equal(confint, res[:, 1:3], decimal=2)

diff --git a/statsmodels/stats/tests/test_power.py b/statsmodels/stats/tests/test_power.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+# pylint: disable=W0231, W0142
 """Tests for statistical power calculations
 
 Note:
@@ -26,7 +27,7 @@
 try:
     import matplotlib.pyplot as plt  #makes plt available for test functions
     have_matplotlib = True
-except:
+except ImportError:
     have_matplotlib = False
 
 

diff --git a/statsmodels/tsa/mlemodel.py b/statsmodels/tsa/mlemodel.py
@@ -15,7 +15,7 @@
 
 try:
     import numdifftools as ndt
-except:
+except ImportError:
     pass
 
 from statsmodels.base.model import LikelihoodModel

diff --git a/statsmodels/tsa/tests/test_ar.py b/statsmodels/tsa/tests/test_ar.py
@@ -14,7 +14,7 @@
 DECIMAL_5 = 5
 DECIMAL_4 = 4
 
-class CheckAR(object):
+class CheckARMixin(object):
     def test_params(self):
         assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_6)
 
@@ -39,7 +39,7 @@ def test_pickle(self):
         res_unpickled = self.res1.__class__.load(fh)
         assert_(type(res_unpickled) is type(self.res1))
 
-class TestAROLSConstant(CheckAR):
+class TestAROLSConstant(CheckARMixin):
     """
     Test AR fit by OLS with a constant.
     """
@@ -76,7 +76,7 @@ def test_predict(self):
                 self.res2.FVOLSn15start312, DECIMAL_4)
 
 
-class TestAROLSNoConstant(CheckAR):
+class TestAROLSNoConstant(CheckARMixin):
     """f
     Test AR fit by OLS without a constant.
     """
@@ -252,9 +252,9 @@ def test_ar_dates():
     pred = ar_model.predict(start='2005', end='2015')
     predict_dates = sm.tsa.datetools.dates_from_range('2005', '2015')
     try:
-        from pandas import DatetimeIndex
+        from pandas import DatetimeIndex  # pylint: disable-msg=E0611
         predict_dates = DatetimeIndex(predict_dates, freq='infer')
-    except:
+    except ImportError:
         pass
     assert_equal(ar_model.data.predict_dates, predict_dates)
     assert_equal(pred.index, predict_dates)