Skip to content

Commit

Permalink
Merge pull request statsmodels#1823 from josef-pkt/gee-cat-subclass_1…
Browse files Browse the repository at this point in the history
…694_rebase

REF: Gee cat subclass 1694 rebased, closes statsmodels#1694
  • Loading branch information
josef-pkt committed Jul 11, 2014
2 parents f8e702c + 8d206bb commit 2081ff3
Show file tree
Hide file tree
Showing 4 changed files with 721 additions and 333 deletions.
8 changes: 4 additions & 4 deletions docs/source/release/version0.6.rst
Expand Up @@ -36,13 +36,13 @@ covariates.
from statsmodels.genmod.generalized_estimating_equations import GEE
from statsmodels.genmod.dependence_structures import Independence
from statsmodels.genmod.families import Poisson
data_url = "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv"
data = pd.read_csv(data_url)
fam = Poisson()
ind = Independence()
md = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"],
md = GEE.from_formula("y ~ age + trt + base", "subject", data,
covstruct=ind, family=fam)
mdf = md.fit()
print mdf.summary()
Expand Down Expand Up @@ -96,7 +96,7 @@ We added a naive seasonal decomposition tool in the same vein as R's ``decompose
import statsmodels.api as sm
dta = sm.datasets.co2.load_pandas().data
# deal with missing values. see issue
# deal with missing values. see issue
dta.co2.interpolate(inplace=True)
res = sm.tsa.seasonal_decompose(dta.co2)
Expand Down
98 changes: 71 additions & 27 deletions statsmodels/genmod/dependence_structures/covstruct.py
@@ -1,6 +1,10 @@
from statsmodels.compat.python import iterkeys, itervalues, zip, range
from statsmodels.stats.correlation_tools import cov_nearest
import numpy as np
from scipy import linalg as spl
from statsmodels.tools.sm_exceptions import (ConvergenceWarning,
IterationLimitWarning)
import warnings


class CovStruct(object):
Expand All @@ -24,6 +28,10 @@ def __init__(self):
# Parameters describing the dependency structure
self.dep_params = None

self.cov_adjust = 0



def initialize(self, model):
"""
Called by GEE, used by implementations that need additional
Expand All @@ -49,7 +57,8 @@ def update(self, params):
raise NotImplementedError

def covariance_matrix(self, endog_expval, index):
"""Returns the working covariance or correlation matrix for a
"""
Returns the working covariance or correlation matrix for a
given cluster of data.
Parameters
Expand Down Expand Up @@ -105,6 +114,10 @@ def covariance_matrix_solve(self, expval, index, stdev, rhs):
(e.g. binomial) do not use the `stdev` parameter when forming
the covariance matrix.
If the covariance matrix is singular or not SPD, it is
projected to the nearest such matrix. These projection events
are recorded in the fit_history member of the GEE model.
Systems of linear equations with the covariance matrix as the
left hand side (LHS) are solved for different right hand sides
(RHS); the LHS is only factorized once to save time.
Expand All @@ -119,9 +132,25 @@ def covariance_matrix_solve(self, expval, index, stdev, rhs):
if is_cor:
vmat *= np.outer(stdev, stdev)

try:
vco = spl.cho_factor(vmat)
except np.linalg.LinAlgError:
# Factor, the covariance matrix. If the factorization fails,
# attempt to condition it into a factorizable matrix.
threshold = 1e-2
success = False
cov_adjust = 0
for itr in range(10):
try:
vco = spl.cho_factor(vmat)
success = True
break
except np.linalg.LinAlgError:
vmat = cov_nearest(vmat, method="nearest", threshold=threshold)
threshold *= 2
cov_adjust = 1

self.cov_adjust += cov_adjust
if success == False:
warnings.warn("Unable to condition covariance matrix to an SPD matrix",
ConvergenceWarning)
return None

soln = [spl.cho_solve(vco, x) for x in rhs]
Expand Down Expand Up @@ -163,7 +192,7 @@ def covariance_matrix_solve(self, expval, index, stdev, rhs):
covariance_matrix_solve.__doc__ = CovStruct.covariance_matrix_solve.__doc__

def summary(self):
return "Observations within a cluster are independent."
return "Observations within a cluster are modeled as being independent."


class Exchangeable(CovStruct):
Expand Down Expand Up @@ -668,10 +697,6 @@ def __init__(self, endog_type):

def initialize(self, model):

# Delay processing until model.setup_ordinal has been called.
if model.ordinal == False and model.nominal == False:
return

super(GlobalOddsRatio, self).initialize(model)

self.nlevel = len(model.endog_values)
Expand All @@ -685,19 +710,30 @@ def initialize(self, model):
ibd.append(ibd1)
self.ibd = ibd

# Need to restrict to between-subject pairs
cpp = []
for v in model.endog_li:
m = len(v) / self.ncut
jj = np.kron(np.ones(m), np.arange(self.ncut))
j1 = np.outer(jj, np.ones(len(jj)))
j2 = np.outer(np.ones(len(jj)), jj)

# Number of subjects in this group
m = int(len(v) / self.ncut)

cpp1 = {}
for k1 in range(self.ncut):
for k2 in range(k1+1):
v1, v2 = np.nonzero((j1 == k1) & (j2 == k2))
cpp1[(k2, k1)] = \
np.hstack((v2[:, None], v1[:, None]))
# Loop over distinct subject pairs
for i1 in range(m):
for i2 in range(i1):
# Loop over cut point pairs
for k1 in range(self.ncut):
for k2 in range(k1+1):
if (k2, k1) not in cpp1:
cpp1[(k2, k1)] = []
j1 = i1*self.ncut + k1
j2 = i2*self.ncut + k2
cpp1[(k2, k1)].append([j2, j1])

for k in cpp1.keys():
cpp1[k] = np.asarray(cpp1[k])
cpp.append(cpp1)

self.cpp = cpp

# Initialize the dependence parameters
Expand Down Expand Up @@ -739,12 +775,13 @@ def covariance_matrix(self, expected_value, index):
return vmat, False

def observed_crude_oddsratio(self):
"""The crude odds ratio is obtained by pooling all data
corresponding to a given pair of cut points (c,c'), then
forming the inverse variance weighted average of these odds
ratios to obtain a single OR. Since the covariate effects are
ignored, this OR will generally be greater than the stratified
OR.
"""
To obtain the crude (global) odds ratio, first pool all binary
indicators corresponding to a given pair of cut points (c,c'),
then calculate the odds ratio for this 2x2 table. The crude
odds ratio is the inverse variance weighted average of these
odds ratios. Since the covariate effects are ignored, this OR
will generally be greater than the stratified OR.
"""

cpp = self.cpp
Expand Down Expand Up @@ -779,7 +816,7 @@ def get_eyy(self, endog_expval, index):
"""
Returns a matrix V such that V[i,j] is the joint probability
that endog[i] = 1 and endog[j] = 1, based on the marginal
probabilities of endog and the odds ratio `current_or`.
probabilities of endog and the global odds ratio `current_or`.
"""

current_or = self.dep_params
Expand Down Expand Up @@ -810,8 +847,10 @@ def get_eyy(self, endog_expval, index):
return vmat

def update(self, params):
"""Update the global odds ratio based on the current value of
params."""
"""
Update the global odds ratio based on the current value of
params.
"""

endog = self.model.endog_li
cpp = self.cpp
Expand Down Expand Up @@ -846,6 +885,11 @@ def update(self, params):
cor_expval = self.pooled_odds_ratio(list(itervalues(tables)))

self.dep_params *= self.crude_or / cor_expval
if not np.isfinite(self.dep_params):
self.dep_params = 1.
warnings.warn("dep_params became inf, resetting to 1",
ConvergenceWarning)


update.__doc__ = CovStruct.update.__doc__
covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
Expand Down

0 comments on commit 2081ff3

Please sign in to comment.