-
Notifications
You must be signed in to change notification settings - Fork 0
/
calcVarPart.R
138 lines (86 loc) · 3.44 KB
/
calcVarPart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Gabriel Hoffman
# October 28, 2019
# Variance partition fraction for GLM with linear, logistic or probit regression
# Reports variance fraction on the linear scale (i.e. eta)
# rather than the observed scale (i.e. y)
# For linear regression with an identity link, these are the same
# but for logit/probit, the fractions are not well defined on the observed scale
# [See development code for attempts]
# The residual variance is the variance of the distribution function of the link
# logit -> logistic distribution -> variance is pi^2/3
# probit -> standard normal distribution -> variance is 1
#
# Proposed by
# McKelvey and Zavoina. A statistical model for the analysis of ordinal level dependent variables. The Journal of Mathematical Sociology 4(1) 103-120 https://doi.org/10.1080/0022250X.1975.9989847
#
# Nagelkerke's pseudo R^2 evaluates the variance explained by the full model. Instead, a variance partitioning approach evaluates the variance explained by each term in the model, so that the sum of each systematic plus random term sums to 1 (Hoffman and Schadt, 2016, Nakagawa and Schielzeth, 2012).
# Also see
# DeMaris. Explained Variance in Logistic Regression: A Monte Carlo Study of Proposed Measures. Sociological Methods & Research 2002 https://doi.org/10.1177/0049124102031001002
# Nakagawa and Schielzeth. 2012. A general and simple method for obtaining R2fromgeneralized linear mixed-effects models
#' @export
#' @rdname calcVarPart-method
#' @aliases calcVarPart,glm-method
setMethod("calcVarPart", "glm",
function(fit, adjust=NULL, adjustAll=FALSE, showWarnings=TRUE, ...){
N = nrow(fit$model)
# Compute eta for each term
# predicted value in linear space for each term
Eta = predict(fit, type="terms")
# compute residual term for each link
distVar = switch( fit$family$link ,
"logit" = (pi^2)/3,
"probit" = 1,
"identity" = var(fit$residuals) )
if( is.null(distVar) ){
stop("glm link not supported: ", fit$family$link )
}
# variance on linear scale
# get variance of each term
# append with variance due to link function
var_term = c(apply(Eta, 2, var), distVar)
# compute fraction
varFrac_linear = var_term / sum( var_term )
names(varFrac_linear) = c(colnames(Eta), "Residuals")
return( varFrac_linear )
})
# CODE FROM development
# g^{-1}( mu + eta_j) / var(Y)
# compute denominator
#####################
# # get response
# y = as.numeric(fit$model[,attr(fit$terms,"response")])
# # denominator
# var_Y = var( y )
# variance on response scale
# g_inv = fit$family$linkinv
# Y_hat = apply( Eta, 2, function(x){
# g_inv( x + mu )
# })
# # C = cov(Y_hat)
# # sum(eigen(C)$values)
# v = apply(Y_hat, 2, var)
# v / var_Y
# denom = ifelse( var_Y > sum(v), var_Y, sum(v))
# varFrac_response = apply(Y_hat, 2, var) / var_Y
# apply(Y_hat, 2, var) / var_Y
# # deviance
# deviance = anova(fit)[['Resid. Dev']]
# 1 - (fit$deviance / deviance[1])
# baseline
# mu = attr(Eta,"constant")
# par(mfrow=c(1,3))
# plot(y, rowSums(Eta) + mu)
# plot(y, predict(fit))
# plot(y, predict(fit, type="response"))
# attr(fit$terms,"term.labels")
# dcmp = svd(scale(Y_hat))
# A = sweep(dcmp$u, 2, dcmp$d^-1,FUN="*")
# cor(t(A) %*% Y_hat)
# dcmp = svd(scale(Y_hat))
# A = sweep(dcmp$v, 2, dcmp$d^-1,FUN="*")
# cor(scale(Y_hat) %*% (A))
# dcmp = svd(scale(Y_hat, scale=FALSE))
# A = sweep(dcmp$v, 2, dcmp$d^-1,FUN="*")
# cor(scale(Y_hat, scale=FALSE) %*% A)
# cv = colVars(scale(Y_hat, scale=FALSE)%*% A)
# cv