-
Notifications
You must be signed in to change notification settings - Fork 17
/
summary.emdi.R
269 lines (241 loc) · 10.5 KB
/
summary.emdi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#' Summarizes an emdiObject
#'
#' Additional information about the data and model in small area estimation
#' methods and components of an emdi object are extracted. The returned object
#' is suitable for printing with the \code{print.summary.emdi} method.
#' @param object an object of type "emdi", representing point and MSE
#' estimates. Objects differ depending on the estimation method: direct
#' vs. model-based.
#' @param ... additional arguments that are not used in this method.
#' @return an object of type "summary.emdi" with following
#' components:
#' \item{out_of_smp}{if model-based estimation, number of out-of-sample domains
#' equivalent to \code{N_dom_unobs} (see \code{\link{emdiObject}}).}
#' \item{in_smp}{number of in-sample domains equivalent to \code{N_dom_smp}
#' (see \code{\link{emdiObject}}).}
#' \item{size_smp}{number of units in sample equivalent to \code{N_smp}
#' (see \code{\link{emdiObject}}).}
#' \item{size_pop}{if model-based estimation, number of units in population equivalent to \code{N_pop}
#' (see \code{\link{emdiObject}}).}
#' \item{size_dom}{a data frame with rows Sample_domains and Population_domains
#' (if model-based estimation) representing summary statisitics of the sample
#' sizes across domains of sample and population data, respectively.}
#' \item{transform}{if model-based estimation, a data frame with columns
#' Transformation, Method, Optimal_lambda and Shift_parameter representing the
#' chosen transformation type and estimation method for lambda
#' as well as their results.}
#' \item{normality}{if model-based estimation, a data frame with columns Skewness,
#' Kurtosis, Shapiro_W and Shapiro_p where the latter two represent
#' the results of a Shapiro-Wilks-Test for normality.
#' Rows correspond to Pearson residuals and random effects
#' of the nested error regression model. The functions
#' \code{\link{skewness}} and \code{\link{kurtosis}} are from
#' the package \pkg{moments}. Details for the Shapiro-Wilks-Test
#' are provided by \code{\link{shapiro.test}}.}
#' \item{icc}{if model-based estimation, the value of the intraclass coefficient.}
#' \item{coeff_determ}{if model-based estimation, a data frame with colums
#' Marginal_R2 and Conditional_R2 representing two R2 measures
#' for linear mixed models from the \pkg{MuMIn} package
#' obtained by function \code{\link[MuMIn]{r.squaredGLMM}}.}
#' \item{call}{a list containing an image of the function call that produced the
#' object.}
#' @seealso \code{\link{emdiObject}}, \code{\link{direct}}, \code{\link{ebp}},
#' \code{\link[MuMIn]{r.squaredGLMM}}, \code{\link[moments]{skewness}},
#' \code{\link[moments]{kurtosis}}, \code{\link[stats]{shapiro.test}}
#' @examples
#' \dontrun{
#' # Loading data - population and sample data
#' data("eusilcA_pop")
#' data("eusilcA_smp")
#'
#' # Example with two additional indicators
#' emdi_model <- ebp(fixed = eqIncome ~ gender + eqsize + cash +
#' self_empl + unempl_ben + age_ben + surv_ben + sick_ben + dis_ben + rent +
#' fam_allow + house_allow + cap_inv + tax_adj, pop_data = eusilcA_pop,
#' pop_domains = "district", smp_data = eusilcA_smp, smp_domains = "district",
#' threshold = function(y){0.6 * median(y)}, L = 50, MSE = TRUE, B = 50,
#' custom_indicator = list( my_max = function(y, threshold){max(y)},
#' my_min = function(y, threshold){min(y)}), na.rm = TRUE, cpus = 1)
#'
#' # Receive first overview
#' summary(emdi_model)
#' }
#' @export
#' @importFrom moments skewness kurtosis
#' @importFrom MuMIn r.squaredGLMM
#'
summary.emdi <- function(object, ...) {
if(!inherits(object, "emdi")){
stop('First object needs to be of class emdi.')
}
if(inherits(object, "model")){
call_emdi <- object$call
N_dom_unobs <- object$framework$N_dom_unobs
N_dom_smp <- object$framework$N_dom_smp
smp_size <- object$framework$N_smp
pop_size <- object$framework$N_pop
smp_size_dom <- summary(as.data.frame(table(object$framework$smp_domains_vec))[,"Freq"])
pop_size_dom <- summary(as.data.frame(table(object$framework$pop_domains_vec))[,"Freq"])
sizedom_smp_pop <- rbind(Sample_domains = smp_size_dom,
Population_domains = pop_size_dom)
if (object$transformation == "box.cox") {
transform_method <- data.frame(Transformation = object$transformation,
Method = object$method,
Optimal_lambda = object$transform_param$optimal_lambda,
Shift_parameter = round(object$transform_param$shift_par,3),
row.names = ""
)
} else if (object$transformation == "log") {
transform_method <- data.frame(Transformation = object$transformation,
Shift_parameter = round(object$transform_param$shift_par,3),
row.names = ""
)
}
else if (object$transformation == "no") {
transform_method <- NULL
# data.frame(Transformation = object$transformation,
# Method = "NULL",
# Optimal_lambda = "NULL",
# Shift_parameter = "NULL",
# row.names = ""
# )
}
skewness_res <- skewness(residuals(object$model, level=0, type="pearson"))
kurtosis_res <- kurtosis(residuals(object$model, level=0, type="pearson"))
skewness_ran <- skewness(ranef(object$model)$'(Intercept)')
kurtosis_ran <- kurtosis(ranef(object$model)$'(Intercept)')
if(length(residuals(object$model, level=0, type="pearson"))>3 &
length(residuals(object$model, level=0, type="pearson")) <5000){
shapiro_p_res <-
shapiro.test(residuals(object$model, level = 0, type = "pearson"))[[2]]
shapiro_W_res <-
shapiro.test(residuals(object$model, level = 0, type = "pearson"))[[1]]
} else {
warning("Number of observations exceeds 5000 or is lower then 3 and thus the
Shapiro-Wilk test is not applicable for the residuals.")
shapiro_p_res <- NA
shapiro_W_res <- NA
}
if(length(ranef(object$model)$'(Intercept)') > 3 &
length(ranef(object$model)$'(Intercept)') < 5000){
shapiro_p_ran <- shapiro.test(ranef(object$model)$'(Intercept)')[[2]]
shapiro_W_ran <- shapiro.test(ranef(object$model)$'(Intercept)')[[1]]
} else {
warning("Number of domains exceeds 5000 or is lower then 3 and thus the
Shapiro-Wilk test is not applicable for the random effects.")
shapiro_p_ran <- NA
shapiro_W_ran <- NA
}
norm <- data.frame(Skewness = c(skewness_res,skewness_ran),
Kurtosis = c(kurtosis_res, kurtosis_ran),
Shapiro_W = c(shapiro_W_res, shapiro_W_ran),
Shapiro_p = c(shapiro_p_res, shapiro_p_ran),
row.names = c("Error", "Random_effect")
)
tempMod <- object$model
tempMod$call$fixed <- object$fixed
r_squared <- r.squaredGLMM(tempMod)
if (is.matrix(r_squared)) {
r_marginal <- r_squared[1, 1]
r_conditional <- r_squared[1, 2]
} else {
r_marginal <- r_squared[1]
r_conditional <- r_squared[2]
}
icc_mixed <- icc(object$model)
coeff_det <- data.frame(
Marginal_R2 = r_marginal,
Conditional_R2 = r_conditional,
row.names = ""
)
sum_emdi <- list(out_of_smp = N_dom_unobs,
in_smp = N_dom_smp,
size_smp = smp_size,
size_pop = pop_size,
size_dom = sizedom_smp_pop,
transform = transform_method,
normality = norm,
icc = icc_mixed,
coeff_determ = coeff_det,
call = call_emdi
)
}
if(inherits(object, "direct")){
call_emdi <- object$call
N_dom_smp <- object$framework$N_dom_smp
smp_size <- object$framework$N_smp
smp_size_tab <- table(object$framework$smp_domains_vec)
smp_size_dom <-
rbind(Sample_domains = summary(as.numeric(smp_size_tab)))
sum_emdi <- list(in_smp = N_dom_smp,
size_smp = smp_size,
size_dom = smp_size_dom,
call = call_emdi,
smp_size_tab = smp_size_tab
)
}
class(sum_emdi) <- "summary.emdi"
sum_emdi
}
#' Prints a summary.emdi object
#'
#' The elements described in summary.emdi are printed.
#' @param x an object of type "summary.emdi", generally resulting
#' from applying summary to an object of type "emdi"
#' @param ... optional arguments passed to print.default; see the documentation on
#' that method functions.
#' @seealso
#' \code{\link{summary.emdi}}
#' @export
print.summary.emdi <- function(x,...) {
if(!is.null(x$size_pop)){
cat("Empirical Best Prediction\n")
cat("\n")
cat("Call:\n ")
print(x$call)
cat("\n")
cat("Out-of-sample domains: ", x$out_of_smp, "\n")
cat("In-sample domains: ", x$in_smp, "\n")
cat("\n")
cat("Sample sizes:\n")
cat("Units in sample: ", x$size_smp, "\n")
cat("Units in population: ", x$size_pop, "\n")
print(x$size_dom)
cat("\n")
cat("Explanatory measures:\n")
print(x$coeff_determ)
cat("\n")
cat("Residual diagnostics:\n")
print(x$normality)
cat("\n")
cat("ICC: ", x$icc, "\n")
cat("\n")
if(is.null(x$transform)){
cat("Transformation: No transformation \n")
} else {
cat("Transformation:\n")
print(x$transform)
}
} else {
cat("Direct estimation\n")
cat("\n")
cat("Call:\n ")
print(x$call)
cat("\n")
cat("In-sample domains: ", x$in_smp, "\n")
cat("\n")
cat("Sample sizes:\n")
cat("Units in sample: ", x$size_smp, "\n")
print(x$size_dom)
cat("\n")
cat("Units in each Domain:")
print(x$smp_size_tab)
}
}
# Auxiliary functions (taken from Results_Mexico_neueEBP.R)---------------------
# ICC
icc <- function(model){
u <- as.numeric(VarCorr(model)[1,1])
e <- model$sigma^2
u / (u + e)
}