statsmodels/sandbox/rls.py

"""Restricted least squares

from pandas
License: Simplified BSD
"""

import numpy as np
from statsmodels.regression.linear_model import WLS, GLS, RegressionResults

class RLS(GLS):
    """
    Restricted general least squares model that handles linear constraints

    Parameters
    ----------
    endog: array-like
        n length array containing the dependent variable
    exog: array-like
        n-by-p array of independent variables
    constr: array-like
        k-by-p array of linear constraints
    param (0.): array-like or scalar
        p-by-1 array (or scalar) of constraint parameters
    sigma (None): scalar or array-like
        The weighting matrix of the covariance. No scaling by default (OLS).
        If sigma is a scalar, then it is converted into an n-by-n diagonal
        matrix with sigma as each diagonal element.
        If sigma is an n-length array, then it is assumed to be a diagonal
        matrix with the given sigma on the diagonal (WLS).

    Notes
    -----
    endog = exog * beta + epsilon
    weights' * constr * beta = param

    See Greene and Seaks, "The Restricted Least Squares Estimator:
    A Pedagogical Note", The Review of Economics and Statistics, 1991.
    """

    def __init__(self, endog, exog, constr, param=0., sigma=None):
        N, Q = exog.shape
        constr = np.asarray(constr)
        if constr.ndim == 1:
            K, P = 1, constr.shape[0]
        else:
            K, P = constr.shape
        if Q != P:
            raise Exception('Constraints and design do not align')
        self.ncoeffs = Q
        self.nconstraint = K
        self.constraint = constr
        if np.isscalar(param) and K > 1:
            param = np.ones((K,)) * param
        self.param = param
        if sigma is None:
            sigma = 1.
        if np.isscalar(sigma):
            sigma = np.ones(N) * sigma
        sigma = np.squeeze(sigma)
        if sigma.ndim == 1:
            self.sigma = np.diag(sigma)
            self.cholsigmainv = np.diag(np.sqrt(sigma))
        else:
            self.sigma = sigma
            self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(self.sigma)).T
        super(GLS, self).__init__(endog, exog)

    _rwexog = None
    @property
    def rwexog(self):
        """Whitened exogenous variables augmented with restrictions"""
        if self._rwexog is None:
            P = self.ncoeffs
            K = self.nconstraint
            design = np.zeros((P + K, P + K))
            design[:P, :P] = np.dot(self.wexog.T, self.wexog) #top left
            constr = np.reshape(self.constraint, (K, P))
            design[:P, P:] = constr.T #top right partition
            design[P:, :P] = constr #bottom left partition
            design[P:, P:] = np.zeros((K, K)) #bottom right partition
            self._rwexog = design
        return self._rwexog

    _inv_rwexog = None
    @property
    def inv_rwexog(self):
        """Inverse of self.rwexog"""
        if self._inv_rwexog is None:
            self._inv_rwexog = np.linalg.inv(self.rwexog)
        return self._inv_rwexog

    _rwendog = None
    @property
    def rwendog(self):
        """Whitened endogenous variable augmented with restriction parameters"""
        if self._rwendog is None:
            P = self.ncoeffs
            K = self.nconstraint
            response = np.zeros((P + K,))
            response[:P] = np.dot(self.wexog.T, self.wendog)
            response[P:] = self.param
            self._rwendog = response
        return self._rwendog

    _ncp = None
    @property
    def rnorm_cov_params(self):
        """Parameter covariance under restrictions"""
        if self._ncp is None:
            P = self.ncoeffs
            self._ncp = self.inv_rwexog[:P, :P]
        return self._ncp

    _wncp = None
    @property
    def wrnorm_cov_params(self):
        """
        Heteroskedasticity-consistent parameter covariance
        Used to calculate White standard errors.
        """
        if self._wncp is None:
            df = self.df_resid
            pred = np.dot(self.wexog, self.coeffs)
            eps = np.diag((self.wendog - pred) ** 2)
            sigmaSq = np.sum(eps)
            pinvX = np.dot(self.rnorm_cov_params, self.wexog.T)
            self._wncp = np.dot(np.dot(pinvX, eps), pinvX.T) * df / sigmaSq
        return self._wncp

    _coeffs = None
    @property
    def coeffs(self):
        """Estimated parameters"""
        if self._coeffs is None:
            betaLambda = np.dot(self.inv_rwexog, self.rwendog)
            self._coeffs = betaLambda[:self.ncoeffs]
        return self._coeffs

    def fit(self):
        rncp = self.wrnorm_cov_params
        lfit = RegressionResults(self, self.coeffs, normalized_cov_params=rncp)
        return lfit

if __name__=="__main__":
    import statsmodels.api as sm
    dta = np.genfromtxt('./rlsdata.txt', names=True)
    design = np.column_stack((dta['Y'],dta['Y']**2,dta[['NE','NC','W','S']].view(float).reshape(dta.shape[0],-1)))
    design = sm.add_constant(design, prepend=True)
    rls_mod = RLS(dta['G'],design, constr=[0,0,0,1,1,1,1])
    rls_fit = rls_mod.fit()
    print rls_fit.params