Skip to content

Commit

Permalink
Merge pull request statsmodels#997 from josef-pkt/misc_13
Browse files Browse the repository at this point in the history
TST/BUG: Bug fixes and tests for diagnostic and pylint fixes.
  • Loading branch information
jseabold committed Jul 31, 2013
2 parents b47911a + 1d42b5b commit 942aaec
Show file tree
Hide file tree
Showing 16 changed files with 132 additions and 93 deletions.
1 change: 0 additions & 1 deletion statsmodels/examples/try_power2.py
Expand Up @@ -48,7 +48,6 @@
nobs_p2 = ttind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power)
print 'nobs ', nobs_p2
print 'effect', ttind_solve_power(effect_size=None, nobs1=nobs_p2, alpha=alpha, power=power), effect_size

print 'alpha ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=None, power=power), alpha
print 'power ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=alpha, power=None), power
print 'ratio ', ttind_solve_power(effect_size=effect_size, nobs1=nobs_p2, alpha=alpha, power=power, ratio=None), 1
Expand Down
18 changes: 6 additions & 12 deletions statsmodels/regression/tests/test_glsar_stata.py
Expand Up @@ -7,20 +7,14 @@
"""

import numpy as np
from numpy.testing import (assert_almost_equal, assert_equal, assert_,
assert_approx_equal, assert_array_less)
from numpy.testing import assert_almost_equal, assert_allclose

from statsmodels.regression.linear_model import OLS, GLSAR
from statsmodels.regression.linear_model import GLSAR
from statsmodels.tools.tools import add_constant
from statsmodels.datasets import macrodata

def assert_allclose(actual, desired, rtol=1e-7, atol=0,
err_msg='', verbose=True):
assert_(np.allclose(actual, desired, rtol=rtol, atol=atol))


class CheckStataResults(object):

class CheckStataResultsMixin(object):

def test_params_table(self):
res, results = self.res, self.results
Expand All @@ -30,7 +24,7 @@ def test_params_table(self):
assert_allclose(res.tvalues, results.tvalues, atol=0, rtol=0.004)
assert_allclose(res.pvalues, results.pvalues, atol=1e-7, rtol=0.004)

class CheckStataResultsP(CheckStataResults):
class CheckStataResultsPMixin(CheckStataResultsMixin):

def test_predicted(self):
res, results = self.res, self.results
Expand All @@ -40,7 +34,7 @@ def test_predicted(self):
#not yet
#assert_almost_equal(res.fittedvalues_se, results.fittedvalues_se, 4)

class TestGLSARCorc(CheckStataResultsP):
class TestGLSARCorc(CheckStataResultsPMixin):

@classmethod
def setup_class(self):
Expand All @@ -56,7 +50,7 @@ def setup_class(self):
self.results = results

def test_rho(self):
assert_almost_equal(self.res.model.rho, self.results.rho, 3)
assert_almost_equal(self.res.model.rho, self.results.rho, 3) # pylint: disable-msg=E1101


if __name__=="__main__":
Expand Down
7 changes: 3 additions & 4 deletions statsmodels/regression/tests/test_quantile_regression.py
@@ -1,9 +1,8 @@
import os
import scipy

import scipy.stats
import numpy as np
import pandas as pd
import statsmodels.api as sm
from numpy.testing import *
from numpy.testing import assert_allclose, assert_equal, assert_almost_equal
from patsy import dmatrices # pylint: disable=E0611
from statsmodels.regression.quantile_regression import QuantReg
from results_quantile_regression import (
Expand Down
2 changes: 1 addition & 1 deletion statsmodels/sandbox/examples/example_mle.py
Expand Up @@ -43,7 +43,7 @@
res = mod.fit()
#print mod.results.params
print 'OLS'
print mod._results.params
print res.params
print 'MLE'
#resfmin2 = optimize.fmin(f, mod.results.params*0.9, maxfun=5000, maxiter=5000, xtol=1e-10, ftol= 1e-10)
resfmin2 = optimize.fmin(f, np.ones(7), maxfun=5000, maxiter=5000, xtol=1e-10, ftol= 1e-10)
Expand Down
2 changes: 1 addition & 1 deletion statsmodels/sandbox/examples/example_pca_regression.py
Expand Up @@ -82,7 +82,7 @@
for inidx, outidx in LeaveOneOut(len(y0)):
resl1o = sm.OLS(y0[inidx], fact_wconst[inidx,:]).fit()
#print data.endog[outidx], res.model.predict(data.exog[outidx,:]),
prederr2 += (y0[outidx] - resl1o.model.predict(fact_wconst[outidx,:]))**2.
prederr2 += (y0[outidx] - resl1o.predict(fact_wconst[outidx,:]))**2.
results.append([k, res.aic, res.bic, res.rsquared_adj, prederr2])

results = np.array(results)
Expand Down
14 changes: 7 additions & 7 deletions statsmodels/sandbox/examples/try_quantile_regression.py
Expand Up @@ -7,27 +7,27 @@
import numpy as np
import statsmodels.api as sm

from statsmodels.sandbox.regression.quantile_regression import quantilereg

from statsmodels.regression.quantile_regression import QuantReg
sige = 5
nobs, k_vars = 500, 5
x = np.random.randn(nobs, k_vars)
#x[:,0] = 1
y = x.sum(1) + sige * (np.random.randn(nobs)/2 + 1)**3
p = 0.5
res_qr = quantilereg(y,x,p)
exog = np.column_stack((np.ones(nobs), x))
res_qr = QuantReg(y, exog).fit(p)

res_qr2 = quantilereg(y,x,0.25)
res_qr3 = quantilereg(y,x,0.75)
res_ols = sm.OLS(y, np.column_stack((np.ones(nobs), x))).fit()
res_qr2 = QuantReg(y, exog).fit(0.25)
res_qr3 = QuantReg(y, exog).fit(0.75)
res_ols = sm.OLS(y, exog).fit()


##print 'ols ', res_ols.params
##print '0.25', res_qr2
##print '0.5 ', res_qr
##print '0.75', res_qr3

params = [res_ols.params, res_qr2, res_qr, res_qr3]
params = [res_ols.params, res_qr2.params, res_qr.params, res_qr3.params]
labels = ['ols', 'qr 0.25', 'qr 0.5', 'qr 0.75']

import matplotlib.pyplot as plt
Expand Down
14 changes: 6 additions & 8 deletions statsmodels/sandbox/examples/try_quantile_regression1.py
Expand Up @@ -10,25 +10,23 @@
from scipy import stats
import statsmodels.api as sm

from statsmodels.sandbox.regression.quantile_regression import quantilereg
from statsmodels.regression.quantile_regression import QuantReg

sige = 0.1
nobs, k_vars = 1500, 3
nobs, k_vars = 500, 3
x = np.random.uniform(-1, 1, size=nobs)
x.sort()
exog = np.vander(x, k_vars+1)[:,::-1]
mix = 0.1 * stats.norm.pdf(x[:,None], loc=np.linspace(-0.5, 0.75, 4), scale=0.01).sum(1)
y = exog.sum(1) + mix + sige * (np.random.randn(nobs)/2 + 1)**3

p = 0.5
x0 = exog[:, 1:] #quantilereg includes constant already!
res_qr = quantilereg(y, x0, p)

res_qr2 = quantilereg(y, x0, 0.1)
res_qr3 = quantilereg(y, x0, 0.75)
res_qr = QuantReg(y, exog).fit(p)
res_qr2 = QuantReg(y, exog).fit(0.1)
res_qr3 = QuantReg(y, exog).fit(0.75)
res_ols = sm.OLS(y, exog).fit()

params = [res_ols.params, res_qr2, res_qr, res_qr3]
params = [res_ols.params, res_qr2.params, res_qr.params, res_qr3.params]
labels = ['ols', 'qr 0.1', 'qr 0.5', 'qr 0.75']

import matplotlib.pyplot as plt
Expand Down
26 changes: 22 additions & 4 deletions statsmodels/stats/power.py
Expand Up @@ -53,11 +53,20 @@ def ttest_power(effect_size, nobs, alpha, df=None, alternative='two-sided'):
pow_ = 0
if alternative in ['two-sided', '2s', 'larger']:
crit_upp = stats.t.isf(alpha_, df)
#print crit_upp, df, d*np.sqrt(nobs)
# use private methods, generic methods return nan with negative d
pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
if np.any(np.isnan(crit_upp)):
# avoid endless loop, https://github.com/scipy/scipy/issues/2667
pow_ = np.nan
else:
pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
if alternative in ['two-sided', '2s', 'smaller']:
crit_low = stats.t.ppf(alpha_, df)
pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
#print crit_low, df, d*np.sqrt(nobs)
if np.any(np.isnan(crit_low)):
pow_ = np.nan
else:
pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
return pow_

def normal_power(effect_size, nobs, alpha, alternative='two-sided', sigma=1.):
Expand Down Expand Up @@ -167,6 +176,8 @@ def __init__(self, **kwds):
self.start_bqexp[key] = dict(low=1., start_upp=50.)
for key in ['ratio']:
self.start_bqexp[key] = dict(low=1e-8, start_upp=2)
for key in ['alpha']:
self.start_bqexp[key] = dict(low=1e-12, upp=1 - 1e-12)

def power(self, *args, **kwds):
raise NotImplementedError
Expand Down Expand Up @@ -207,9 +218,14 @@ def solve_power(self, **kwds):
del kwds['power']
return self.power(**kwds)

self._counter = 0
def func(x):
kwds[key] = x
fval = self._power_identity(**kwds)
self._counter += 1
#print self._counter,
if self._counter > 500:
raise RuntimeError('possible endless loop (500 NaNs)')
if np.isnan(fval):
return np.inf
else:
Expand All @@ -224,6 +240,7 @@ def func(x):

fit_kwds = self.start_bqexp[key]
fit_res = []
#print vars()
try:
val, res = brentq_expanding(func, full_output=True, **fit_kwds)
failed = False
Expand All @@ -240,6 +257,7 @@ def func(x):
#TODO: check more cases to make this robust
val, infodict, ier, msg = optimize.fsolve(func, start_value,
full_output=True) #scalar
#val = optimize.newton(func, start_value) #scalar
fval = infodict['fvec']
fit_res.append(infodict)
if ier == 1 and np.abs(fval) < 1e-4 :
Expand Down Expand Up @@ -512,8 +530,8 @@ def power(self, effect_size, nobs1, alpha, ratio=1, df=None,
df = (nobs1 - 1 + nobs2 - 1)

nobs = 1./ (1. / nobs1 + 1. / nobs2)
return ttest_power(effect_size, nobs, alpha, df=df,
alternative=alternative)
#print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative)
return ttest_power(effect_size, nobs, alpha, df=df, alternative=alternative)

#method is only added to have explicit keywords and docstring
def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None,
Expand Down
39 changes: 34 additions & 5 deletions statsmodels/stats/tests/test_diagnostic.py
Expand Up @@ -592,10 +592,10 @@ def test_influence(self):
lsdiag = json.load(fp)

#basic
assert_almost_equal(lsdiag['cov.scaled'],
res.cov_params().ravel(), decimal=14)
assert_almost_equal(lsdiag['cov.unscaled'],
res.normalized_cov_params.ravel(), decimal=14)
assert_almost_equal(np.array(lsdiag['cov.scaled']).reshape(3, 3),
res.cov_params(), decimal=14)
assert_almost_equal(np.array(lsdiag['cov.unscaled']).reshape(3, 3),
res.normalized_cov_params, decimal=14)

c0, c1 = infl.cooks_distance #TODO: what's c1

Expand Down Expand Up @@ -642,6 +642,35 @@ def test_influence(self):
'''


class TestDiagnosticGPandas(TestDiagnosticG):

def __init__(self):
d = macrodata.load_pandas().data
#growth rates
d['gs_l_realinv'] = 400 * np.log(d['realinv']).diff()
d['gs_l_realgdp'] = 400 * np.log(d['realgdp']).diff()
d['lint'] = d['realint'].shift(1)
d['tbilrate'] = d['tbilrate'].shift(1)

d = d.dropna()
self.d = d
endogg = d['gs_l_realinv']
exogg = add_constant(d[['gs_l_realgdp', 'lint']])
exogg2 = add_constant(d[['gs_l_realgdp', 'tbilrate']])
exogg3 = add_constant(d[['gs_l_realgdp']])

res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()

res_ols3 = OLS(endogg, exogg3).fit()

self.res = res_ols
self.res2 = res_ols2
self.res3 = res_ols3
self.endog = self.res.model.endog
self.exog = self.res.model.exog


def grangertest():
#> gt = grangertest(ginv, ggdp, order=4)
#> gt
Expand Down Expand Up @@ -816,7 +845,7 @@ def test_outlier_test():
res2 = np.c_[rstudent, unadj_p, bonf_p]
res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True)
np.testing.assert_almost_equal(res.values, res2, 7)
np.testing.assert_equal(res.index.tolist(), sorted_labels)
np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103


if __name__ == '__main__':
Expand Down
10 changes: 5 additions & 5 deletions statsmodels/stats/tests/test_groups_sw.py
Expand Up @@ -12,11 +12,11 @@
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.grouputils import Group, GroupSorted

class CheckPanelLag(object):
class CheckPanelLagMixin(object):

def calculate(self):
self.g = g = GroupSorted(self.gind)
self.alla = [(lag, sw.lagged_groups(self.x, lag, g.groupidx))
self.g = g = GroupSorted(self.gind) # pylint: disable-msg=W0201
self.alla = [(lag, sw.lagged_groups(self.x, lag, g.groupidx)) # pylint: disable-msg=W0201
for lag in range(5)]

def test_values(self):
Expand All @@ -30,7 +30,7 @@ def test_raises(self):
self.g.groupidx)


class TestBalanced(CheckPanelLag):
class TestBalanced(CheckPanelLagMixin):

def __init__(self):
self.gind = np.repeat([0,1,2], 5)
Expand All @@ -50,7 +50,7 @@ def __init__(self):
}
self.calculate()

class TestUnBalanced(CheckPanelLag):
class TestUnBalanced(CheckPanelLagMixin):

def __init__(self):
self.gind = gind = np.repeat([0,1,2], [3, 5, 10])
Expand Down
1 change: 0 additions & 1 deletion statsmodels/stats/tests/test_multi.py
Expand Up @@ -323,7 +323,6 @@ def test_tukeyhsd():

m_r = [94.39167, 102.54167, 91.13333, 118.20000, 99.18333]
myres = tukeyhsd(m_r, 6, 110.8, alpha=0.05, df=4)
from numpy.testing import assert_almost_equal, assert_equal
pairs, reject, meandiffs, std_pairs, confint, q_crit = myres[:6]
assert_almost_equal(meandiffs, res[:, 0], decimal=5)
assert_almost_equal(confint, res[:, 1:3], decimal=2)
Expand Down
3 changes: 2 additions & 1 deletion statsmodels/stats/tests/test_power.py
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# pylint: disable=W0231, W0142
"""Tests for statistical power calculations
Note:
Expand Down Expand Up @@ -26,7 +27,7 @@
try:
import matplotlib.pyplot as plt #makes plt available for test functions
have_matplotlib = True
except:
except ImportError:
have_matplotlib = False


Expand Down
2 changes: 1 addition & 1 deletion statsmodels/tsa/mlemodel.py
Expand Up @@ -15,7 +15,7 @@

try:
import numdifftools as ndt
except:
except ImportError:
pass

from statsmodels.base.model import LikelihoodModel
Expand Down
10 changes: 5 additions & 5 deletions statsmodels/tsa/tests/test_ar.py
Expand Up @@ -14,7 +14,7 @@
DECIMAL_5 = 5
DECIMAL_4 = 4

class CheckAR(object):
class CheckARMixin(object):
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_6)

Expand All @@ -39,7 +39,7 @@ def test_pickle(self):
res_unpickled = self.res1.__class__.load(fh)
assert_(type(res_unpickled) is type(self.res1))

class TestAROLSConstant(CheckAR):
class TestAROLSConstant(CheckARMixin):
"""
Test AR fit by OLS with a constant.
"""
Expand Down Expand Up @@ -76,7 +76,7 @@ def test_predict(self):
self.res2.FVOLSn15start312, DECIMAL_4)


class TestAROLSNoConstant(CheckAR):
class TestAROLSNoConstant(CheckARMixin):
"""f
Test AR fit by OLS without a constant.
"""
Expand Down Expand Up @@ -252,9 +252,9 @@ def test_ar_dates():
pred = ar_model.predict(start='2005', end='2015')
predict_dates = sm.tsa.datetools.dates_from_range('2005', '2015')
try:
from pandas import DatetimeIndex
from pandas import DatetimeIndex # pylint: disable-msg=E0611
predict_dates = DatetimeIndex(predict_dates, freq='infer')
except:
except ImportError:
pass
assert_equal(ar_model.data.predict_dates, predict_dates)
assert_equal(pred.index, predict_dates)
Expand Down

0 comments on commit 942aaec

Please sign in to comment.