forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tut_ols_wls.py
149 lines (121 loc) · 4.15 KB
/
tut_ols_wls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
'''Compare OLS and WLS
Note: uncomment plt.show() to display graphs
'''
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std
nsample = 50
x1 = np.linspace(0, 20, nsample)
X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
sig = 0.5
beta = [0.5, -0.0, 5.]
y_true2 = np.dot(X, beta)
y2 = y_true2 + sig*1. * np.random.normal(size=nsample)
res2 = sm.OLS(y2, X).fit()
print res2.params
print res2.bse
#print res.predict
plt.figure()
plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
#@savefig tut_ols_wls_0.png
plt.plot(x1, res2.fittedvalues, 'r--')
# Example WLS: Heteroscedasticity 2 groups
# ----------------------------------------
#model assumption:
# * identical coefficients
# * misspecificaion: true model is quadratic, estimate only linear
# * independent noise/error term
# * two groups for error variance, low and high variance groups
#..np.random.seed(123456789)
np.random.seed(0)
#..9876789) #9876543)
beta = [0.5, -0.01, 5.]
y_true2 = np.dot(X, beta)
w = np.ones(nsample)
w[nsample*6/10:] = 3
#..y2[:nsample*6/10] = y_true2[:nsample*6/10] + sig*1. * np.random.normal(size=nsample*6/10)
#..y2[nsample*6/10:] = y_true2[nsample*6/10:] + sig*4. * np.random.normal(size=nsample*4/10)
y2 = y_true2 + sig*w* np.random.normal(size=nsample)
X2 = X[:,[0,2]]
# OLS estimate
# ^^^^^^^^^^^^
# unbiased parameter estimated, biased parameter covariance, standard errors
print 'OLS'
res2 = sm.OLS(y2, X[:,[0,2]]).fit()
print 'OLS beta estimates'
print res2.params
print 'OLS stddev of beta'
print res2.bse
# heteroscedasticity corrected standard errors for OLS
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#OLS standard errors are inconsistent (?) with heteroscedasticity
#use correction
#sandwich estimators of parameter covariance matrix
print 'heteroscedasticity corrected standard error of beta estimates'
print res2.HC0_se
print res2.HC1_se
print res2.HC2_se
print res2.HC3_se
#print res.predict
#plt.plot(x1, res2.fittedvalues, '--')
#WLS knowing the true variance ratio of heteroscedasticity
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
print '\nWLS'
res3 = sm.WLS(y2, X[:,[0,2]], 1./w).fit()
print 'WLS beta estimates'
print res3.params
print 'WLS stddev of beta'
print res3.bse
#print res.predict
#plt.plot(x1, res3.fittedvalues, '--.')
#Detour write function for prediction standard errors
#Prediction Interval for OLS
#---------------------------
covb = res2.cov_params()
# full covariance:
#predvar = res2.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T)))
# predication variance only
predvar = res2.mse_resid + (X2 * np.dot(covb,X2.T).T).sum(1)
predstd = np.sqrt(predvar)
tppf = stats.t.ppf(0.975, res2.df_resid)
#..Prediction Interval for WLS
#..---------------------------
#..covb = res3.cov_params()
##.. full covariance:
##..predvar = res3.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T)))
##.. predication variance only
#..predvar = res3.mse_resid*w + (X2 * np.dot(covb,X2.T).T).sum(1)
#..predstd = np.sqrt(predvar)
#..tppf = stats.t.ppf(0.975, res3.df_resid)
#..plt.plot(x1, res3.fittedvalues, 'g--.')
#..plt.plot(x1, res3.fittedvalues + tppf * predstd, 'g--')
#..plt.plot(x1, res3.fittedvalues - tppf * predstd, 'g--')
#..plt.title('blue: true, red: OLS, green: WLS')
prstd, iv_l, iv_u = wls_prediction_std(res3)
plt.figure()
plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
plt.plot(x1, res2.fittedvalues, 'r--')
plt.plot(x1, res2.fittedvalues + tppf * predstd, 'r--')
plt.plot(x1, res2.fittedvalues - tppf * predstd, 'r--')
plt.plot(x1, res3.fittedvalues, 'g--.')
plt.plot(x1, iv_u, 'g--')
plt.plot(x1, iv_l, 'g--')
#@savefig tut_ols_wls_1.png
plt.title('blue: true, red: OLS, green: WLS')
# 2-stage least squares for FGLS (FWLS)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
print '\n2-stage least squares for FGLS (FWLS)'
resid1 = res2.resid[w==1.]
var1 = resid1.var(ddof=int(res2.df_model)+1)
resid2 = res2.resid[w!=1.]
var2 = resid2.var(ddof=int(res2.df_model)+1)
west = w.copy()
west[w!=1.] = np.sqrt(var2)/np.sqrt(var1)
res3 = sm.WLS(y2, X[:,[0,2]], 1./west).fit()
print 'feasible WLS beta estimates'
print res3.params
print 'feasible WLS stddev of beta'
print res3.bse
#..plt.show()