forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
try_mlecov.py
235 lines (188 loc) · 7.24 KB
/
try_mlecov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
'''Multivariate Normal Model with full covariance matrix
toeplitz structure is not exploited, need cholesky or inv for toeplitz
Author: josef-pktd
'''
from __future__ import print_function
import numpy as np
#from scipy import special #, stats
from scipy import linalg
from scipy.linalg import norm, toeplitz
import statsmodels.api as sm
from statsmodels.base.model import (GenericLikelihoodModel,
LikelihoodModel)
from statsmodels.tsa.arima_process import arma_acovf, arma_generate_sample
def mvn_loglike_sum(x, sigma):
'''loglike multivariate normal
copied from GLS and adjusted names
not sure why this differes from mvn_loglike
'''
nobs = len(x)
nobs2 = nobs / 2.0
SSR = (x**2).sum()
llf = -np.log(SSR) * nobs2 # concentrated likelihood
llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant
if np.any(sigma) and sigma.ndim == 2:
#FIXME: robust-enough check? unneeded if _det_sigma gets defined
llf -= .5*np.log(np.linalg.det(sigma))
return llf
def mvn_loglike(x, sigma):
'''loglike multivariate normal
assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)
brute force from formula
no checking of correct inputs
use of inv and log-det should be replace with something more efficient
'''
#see numpy thread
#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
sigmainv = linalg.inv(sigma)
logdetsigma = np.log(np.linalg.det(sigma))
nobs = len(x)
llf = - np.dot(x, np.dot(sigmainv, x))
llf -= nobs * np.log(2 * np.pi)
llf -= logdetsigma
llf *= 0.5
return llf
def mvn_loglike_chol(x, sigma):
'''loglike multivariate normal
assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)
brute force from formula
no checking of correct inputs
use of inv and log-det should be replace with something more efficient
'''
#see numpy thread
#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
sigmainv = np.linalg.inv(sigma)
cholsigmainv = np.linalg.cholesky(sigmainv).T
x_whitened = np.dot(cholsigmainv, x)
logdetsigma = np.log(np.linalg.det(sigma))
nobs = len(x)
from scipy import stats
print('scipy.stats')
print(np.log(stats.norm.pdf(x_whitened)).sum())
llf = - np.dot(x_whitened.T, x_whitened)
llf -= nobs * np.log(2 * np.pi)
llf -= logdetsigma
llf *= 0.5
return llf, logdetsigma, 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
#0.5 * np.dot(x_whitened.T, x_whitened) + nobs * np.log(2 * np.pi) + logdetsigma)
def mvn_nloglike_obs(x, sigma):
'''loglike multivariate normal
assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)
brute force from formula
no checking of correct inputs
use of inv and log-det should be replace with something more efficient
'''
#see numpy thread
#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
#Still wasteful to calculate pinv first
sigmainv = np.linalg.inv(sigma)
cholsigmainv = np.linalg.cholesky(sigmainv).T
#2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist
# logdet not needed ???
#logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
x_whitened = np.dot(cholsigmainv, x)
#sigmainv = linalg.cholesky(sigma)
logdetsigma = np.log(np.linalg.det(sigma))
sigma2 = 1. # error variance is included in sigma
llike = 0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv))
+ (x_whitened**2)/sigma2
+ np.log(2*np.pi))
return llike
def invertibleroots(ma):
import numpy.polynomial as poly
pr = poly.polyroots(ma)
insideroots = np.abs(pr)<1
if insideroots.any():
pr[np.abs(pr)<1] = 1./pr[np.abs(pr)<1]
pnew = poly.Polynomial.fromroots(pr)
mainv = pn.coef/pnew.coef[0]
wasinvertible = False
else:
mainv = ma
wasinvertible = True
return mainv, wasinvertible
def getpoly(self, params):
ar = np.r_[[1], -params[:self.nar]]
ma = np.r_[[1], params[-self.nma:]]
import numpy.polynomial as poly
return poly.Polynomial(ar), poly.Polynomial(ma)
class MLEGLS(GenericLikelihoodModel):
'''ARMA model with exact loglikelhood for short time series
Inverts (nobs, nobs) matrix, use only for nobs <= 200 or so.
This class is a pattern for small sample GLS-like models. Intended use
for loglikelihood of initial observations for ARMA.
TODO:
This might be missing the error variance. Does it assume error is
distributed N(0,1)
Maybe extend to mean handling, or assume it is already removed.
'''
def _params2cov(self, params, nobs):
'''get autocovariance matrix from ARMA regression parameter
ar parameters are assumed to have rhs parameterization
'''
ar = np.r_[[1], -params[:self.nar]]
ma = np.r_[[1], params[-self.nma:]]
#print('ar', ar
#print('ma', ma
#print('nobs', nobs
autocov = arma_acovf(ar, ma, nobs=nobs)
#print('arma_acovf(%r, %r, nobs=%d)' % (ar, ma, nobs)
#print(autocov.shape
#something is strange fixed in aram_acovf
autocov = autocov[:nobs]
sigma = toeplitz(autocov)
return sigma
def loglike(self, params):
sig = self._params2cov(params[:-1], self.nobs)
sig = sig * params[-1]**2
loglik = mvn_loglike(self.endog, sig)
return loglik
def fit_invertible(self, *args, **kwds):
res = self.fit(*args, **kwds)
ma = np.r_[[1], res.params[self.nar: self.nar+self.nma]]
mainv, wasinvertible = invertibleroots(ma)
if not wasinvertible:
start_params = res.params.copy()
start_params[self.nar: self.nar+self.nma] = mainv[1:]
#need to add args kwds
res = self.fit(start_params=start_params)
return res
if __name__ == '__main__':
nobs = 50
ar = [1.0, -0.8, 0.1]
ma = [1.0, 0.1, 0.2]
#ma = [1]
np.random.seed(9875789)
y = arma_generate_sample(ar,ma,nobs,2)
y -= y.mean() #I haven't checked treatment of mean yet, so remove
mod = MLEGLS(y)
mod.nar, mod.nma = 2, 2 #needs to be added, no init method
mod.nobs = len(y)
res = mod.fit(start_params=[0.1, -0.8, 0.2, 0.1, 1.])
print('DGP', ar, ma)
print(res.params)
from statsmodels.regression import yule_walker
print(yule_walker(y, 2))
#resi = mod.fit_invertible(start_params=[0.1,0,0.2,0, 0.5])
#print(resi.params
arpoly, mapoly = getpoly(mod, res.params[:-1])
data = sm.datasets.sunspots.load()
#ys = data.endog[-100:]
## ys = data.endog[12:]-data.endog[:-12]
## ys -= ys.mean()
## mods = MLEGLS(ys)
## mods.nar, mods.nma = 13, 1 #needs to be added, no init method
## mods.nobs = len(ys)
## ress = mods.fit(start_params=np.r_[0.4, np.zeros(12), [0.2, 5.]],maxiter=200)
## print(ress.params
## #from statsmodels.sandbox.tsa import arima as tsaa
## #tsaa
## import matplotlib.pyplot as plt
## plt.plot(data.endog[1])
## #plt.show()
sigma = mod._params2cov(res.params[:-1], nobs) * res.params[-1]**2
print(mvn_loglike(y, sigma))
llo = mvn_nloglike_obs(y, sigma)
print(llo.sum(), llo.shape)
print(mvn_loglike_chol(y, sigma))
print(mvn_loglike_sum(y, sigma))