-
Notifications
You must be signed in to change notification settings - Fork 29
/
hierarchical-t-test.stan
149 lines (107 loc) · 4.1 KB
/
hierarchical-t-test.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
*/
data {
real deltaLow;
real deltaHi;
//bounds of the sigma of the higher-level distribution
real std0Low;
real std0Hi;
//bounds on the domain of the sigma of each data set
real stdLow;
real stdHi;
//number of results for each data set. Typically 100 (10 runs of 10-folds cv)
int<lower=2> Nsamples;
//number of data sets.
int<lower=1> q;
//difference of accuracy between the two classifier, on each fold of each data set.
matrix[q,Nsamples] x;
//correlation (1/(number of folds))
real rho;
/*upper and lower bound for alpha and beta, which are the parameters of the Gamma distribution
used as a prior for the degress of freedom.
As a default we suggest: lowerAlpha=0.5; upperAlpha= 5; lowerBeta=0.05; upperBeta = .15
*/
real upperAlpha;
real lowerAlpha;
real upperBeta;
real lowerBeta;
}
transformed data {
//vector of 1s appearing in the likelihood
vector[Nsamples] H;
//vector of 0s: the mean of the mvn noise
vector[Nsamples] zeroMeanVec;
/* M is the correlation matrix of the mvn noise.
invM is its inverse, detM its determinant */
matrix[Nsamples,Nsamples] invM;
real detM;
//The determinant of M is analytically known
detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);
//build H and invM. They do not depend on the data.
for (j in 1:Nsamples){
zeroMeanVec[j]<-0;
H[j]<-1;
for (i in 1:Nsamples){
if (j==i)
invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
else
invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
}
}
/*at this point invM contains the adjugate of M.
we divide it by det(M) to obtain the inverse of M.*/
invM <-invM/detM;
}
parameters {
//mean of the hyperprior from which we sample the delta_i
real<lower=deltaLow,upper=deltaHi> delta0;
//std of the hyperprior from which we sample the delta_i
real<lower=std0Low,upper=std0Hi> std0;
//delta_i of each data set: vector of lenght q.
vector[q] delta;
//sigma of each data set: : vector of lenght q.
vector<lower=stdLow,upper=stdHi>[q] sigma;
/* the domain of (nu - 1) starts from 0
and can be given a gamma prior*/
real<lower=0> nuMinusOne;
//parameters of the Gamma prior on nuMinusOne
real<lower=lowerAlpha,upper=upperAlpha> gammaAlpha;
real<lower=lowerBeta, upper=upperBeta> gammaBeta;
}
transformed parameters {
//degrees of freedom
real<lower=1> nu ;
/*difference between the data (x matrix) and
the vector of the q means.*/
matrix[q,Nsamples] diff;
vector[q] diagQuad;
/*vector of length q:
1 over the variance of each data set*/
vector[q] oneOverSigma2;
vector[q] logDetSigma;
vector[q] logLik;
//degrees of freedom
nu <- nuMinusOne + 1 ;
//1 over the variance of each data set
oneOverSigma2 <- rep_vector(1, q) ./ sigma;
oneOverSigma2 <- oneOverSigma2 ./ sigma;
/*the data (x) minus a matrix done as follows:
the delta vector (of lenght q) pasted side by side Nsamples times*/
diff <- x - rep_matrix(delta,Nsamples);
//efficient matrix computation of the likelihood.
diagQuad <- diagonal (quad_form (invM,diff'));
logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);
logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;
}
model {
/*mu0 and std0 are not explicitly sampled here.
Stan automatically samples them: mu0 as uniform and std0 as
uniform over its domain (std0Low,std0Hi).*/
//sampling the degrees of freedom
nuMinusOne ~ gamma ( gammaAlpha, gammaBeta);
//vectorial sampling of the delta_i of each data set
delta ~ student_t(nu, delta0, std0);
//logLik is computed in the previous block
increment_log_prob(sum(logLik));
}