-
Notifications
You must be signed in to change notification settings - Fork 1
/
infoCriterion.r
95 lines (75 loc) · 3.21 KB
/
infoCriterion.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#' @title Function that calculates cross-validation selection criteria
#' @export
#' @keywords internal
#' @importFrom stats dbinom dpois dnorm
#' @description Function that calculates cross-validation selection criteria
#' @param ynew data matrix corresponding to the observations used as test sample.
#' @param pred predicted value of the linear predictor obtained from Xnew and the estimated parameters.
#' @param family a vector of the same length as the number of responses containing characters
#' identifying the distribution families of the dependent variables.
#' "bernoulli", "binomial", "poisson" or "gaussian" are allowed.
#' @param type information criterion used. Likelihood, aic, bic, aicc or
#' Mean Square Prediction Error (mspe) are defined. Area Under ROC Curve (auc) also defined for Bernoulli cases only.
#' @param size describes the number of trials for the binomial dependent variables.
#' A (number of statistical units * number of binomial dependent variables) matrix is expected.
#' @param npar number of parameters used for penalisation.
#' @return a matrix containing the criterion value for each dependent variable (row)
#' and each number of components (column).
infoCriterion <- function(ynew, pred, family, type, size=NULL, npar=0) {
if(type=="auc")
stop("auc not allowed as loss function!")
checkLossFunction(type)
nobs <- nrow(ynew)
ny <- ncol(ynew)
pen <- switch(type,
likelihood = 0,
aic = npar*2,
bic = npar*log(nobs),
aicc = 2*npar(npar+1)/(nobs-npar-1),
mspe = 0
)
res <- rep(0, ny)
if("bernoulli" %in% family) {
tmpy <- ynew[, which(family %in% "bernoulli"), drop=FALSE]
tmpp <- pred[, which(family %in% "bernoulli"), drop=FALSE]
if(type=="mspe") {
ldber <- apply((tmpy-tmpp)^2/(tmpp*(1-tmpp)), 2, mean)
} else {
ldber <- -2*apply(dbinom(tmpy,1,tmpp,log=TRUE), 2, sum)
}
res[which(family=="bernoulli")] <- ldber
}
if("binomial" %in% family) {
tmpy <- ynew[, which(family %in% "binomial"), drop=FALSE]*size
tmpp <- pred[, which(family %in% "binomial"), drop=FALSE]
if(type=="mspe"){
ldbin <- apply((tmpy-tmpp)^2/(tmpp*(1-tmpp)*size), 2, mean)
} else {
ldbin <- -2*apply(dbinom(tmpy, size, tmpp, log=TRUE), 2, sum)
}
res[which(family=="binomial")] <- ldbin
}
if("poisson" %in% family) {
tmpy <- ynew[, which(family %in% "poisson"), drop=FALSE]
tmpp <- pred[, which(family %in% "poisson"), drop=FALSE]
if(type=="mspe"){
ldpois <- apply((tmpy-tmpp)^2/tmpp, 2, mean)
} else {
ldpois <- -2*apply(dpois(tmpy,tmpp,log=TRUE), 2, sum)
}
res[which(family=="poisson")] <- ldpois
}
if("gaussian" %in% family) {
tmpy <- ynew[, which(family %in% "gaussian"), drop=FALSE]
tmpp <- pred[, which(family %in% "gaussian"), drop=FALSE]
if(type=="mspe"){
v <- apply(tmpy, 2, stats::var)
ldgaus <- apply((tmpy-tmpp)^2, 2, mean)/v #TODO check formula
} else {
sd <- matrix(sqrt(apply((tmpy-tmpp)^2, 2, mean)), nobs, ny, byrow=TRUE)
ldgaus <- -2*apply(dnorm(tmpy, tmpp, sd, log=TRUE), 2, sum) #TODO a corriger
}
res[which(family=="gaussian")] <- ldgaus
}
return(res+pen)
}