update progress

Alfrzlp · Feb 22, 2024 · 42ab58f · 42ab58f
1 parent a9989ad
commit 42ab58f
Show file tree

Hide file tree

Showing 62 changed files with 190 additions and 905 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: saens
 Type: Package
-Title: Small Area Estimation on the Fay-Herriot Model for Non-Sampled Area Estimation with Cluster Information.
+Title: Small Area Estimation with Cluster Information for Estimation of Non-Sampled Areas
 Version: 0.1.0
 Authors@R: c(
-      person("Ridson Al Farizal P", role = c("aut", "cre","cph"), email = "ridsonap@bps.go.id", comment = c(ORCID = "0000-0003-0617-0214")),
+      person("Ridson Al Farizal P", role = c("aut", "cre","cph"), email = "alfrzlp@gmail.com", comment = c(ORCID = "0000-0003-0617-0214")),
       person("Azka Ubaidillah", role = "aut", email = "azka@stis.ac.id", comment = c(ORCID = "0000-0002-3597-0459")) 
     )
-Description: This package provides several methods for small area estimation on the Fay-Herriot model for non-sampled area estimation with cluster information.
+Description: Implementation of small area estimation (Fay-Herriot model) with EBLUP (Empirical Best Linear Unbiased Prediction) Approach for non-sampled area estimation by adding cluster information and assuming that there are similarities among particular areas. See also Rao & Molina (2015, ISBN:978-1-118-73578-7) and Anisa et al. (2013) <doi:10.9790/5728-10121519>.
 License: MIT + file LICENSE
 URL: https://github.com/Alfrzlp/sae-ns
 BugReports: https://github.com/Alfrzlp/sae-ns/issues

diff --git a/R/AIC.R b/R/AIC.R
@@ -1,11 +1,11 @@
-#' Akaike's An Information Criterion
+#' Akaike's An Information Criterion.
 #'
 #' @description Generic function calculating Akaike's "An Information Criterion" for EBLUP model
 #'
-#' @param object EBLUP model
+#' @param object EBLUP model.
 #' @param ... further arguments passed to or from other methods.
 #'
-#' @return AIC value
+#' @return AIC value.
 #'
 #' @examples
 #' m1 <- eblupfh_cluster(y ~ x1 + x2 + x3, data = mys, vardir = "var", cluster = "clust")

diff --git a/R/autoplot.R b/R/autoplot.R
@@ -1,9 +1,9 @@
-#' Autoplot EBLUP results
+#' Autoplot EBLUP results.
 #'
-#' @param object EBLUP model
-#' @param variable variable to plot
+#' @param object EBLUP model.
+#' @param variable variable to plot.
 #' @param ... further arguments passed to or from other methods.
-#' @return plot
+#' @return plot.
 #'
 #' @examples
 #' library(saens)

diff --git a/R/coef.R b/R/coef.R
@@ -1,6 +1,6 @@
-#' Extract Model Coefficients
+#' Extract Model Coefficients.
 #'
-#' @param object EBLUP model
+#' @param object EBLUP model.
 #' @param ... further arguments passed to or from other methods.
 #'
 #' @return model coefficients

diff --git a/R/eblupfh.R b/R/eblupfh.R
@@ -1,21 +1,20 @@
 #' EBLUPs based on a Fay-Herriot Model.
 #'
-#' @description This function gives the EBLUP (or EB predictor under normality) based on a Fay-Herriot model.
+#' @description This function gives the Empirical Best Linear Unbiased Prediction (EBLUP) or Empirical Best (EB) predictor under normality based on a Fay-Herriot model.
 #'
 #' @references
 #' \enumerate{
 #'  \item Rao, J. N., & Molina, I. (2015). Small area estimation. John Wiley & Sons.
-#'  \item Anisa, R., Kurnia, A., & Indahwati, I. (2013). Cluster information of non-sampled area in small area estimation. E-Prosiding Internasional| Departemen Statistika FMIPA Universitas Padjadjaran, 1(1), 69-76.
 #'}
 #'
 #' @param formula an object of class formula that contains a description of the model to be fitted. The variables included in the formula must be contained in the data.
-#' @param data a data frame or a data frame extension (e.g. a tibble)
-#' @param vardir vector or column names from data that contain variance sampling from the direct estimator for each area
-#' @param method Fitting method can be chosen between 'ML' and 'REML'
+#' @param data a data frame or a data frame extension (e.g. a tibble).
+#' @param vardir vector or column names from data that contain variance sampling from the direct estimator for each area.
+#' @param method Fitting method can be chosen between 'ML' and 'REML'.
 #' @param maxiter maximum number of iterations allowed in the Fisher-scoring algorithm. Default is 100 iterations.
 #' @param precision convergence tolerance limit for the Fisher-scoring algorithm. Default value is 0.0001.
-#' @param scale scaling auxiliary variable or not, default value is FALSE
-#' @param print_result print coefficient or not, default value is TRUE
+#' @param scale scaling auxiliary variable or not, default value is FALSE.
+#' @param print_result print coefficient or not, default value is TRUE.
 #'
 #' @returns The function returns a list with the following objects (\code{df_res} and \code{fit}):
 #' \code{df_res} a data frame that contains the following columns: \cr
@@ -24,7 +23,6 @@
 #'    * \code{random_effect} random effect for each area \cr
 #'    * \code{vardir} variance sampling from the direct estimator for each area \cr
 #'    * \code{mse} Mean Square Error \cr
-#'    * \code{cluster} cluster information for each area \cr
 #'    * \code{rse} Relative Standart Error (%) \cr
 #'
 #' \code{fit} a list containing the following objects: \cr
@@ -70,28 +68,27 @@ eblupfh <- function(formula, data, vardir, method = "REML",
 
 
   if (any(is.na(y))) {
-    cli::cli_abort("variable y contains NA values, please use eblupfh cluster function")
+    cli::cli_abort("variable {all.names(formula[2])} contains NA values, please use eblupfh_cluster function")
   }
   datas <- data
 
+  vardir_name <- vardir
   vardir <- .get_variable(datas, vardir)
   formuladata <- stats::model.frame(formula, datas, na.action = NULL)
   X <- stats::model.matrix(formula, datas)
   y <- as.matrix(formuladata[1])
 
   if (scale) {
     X <- scale(X)
-    # my_scale <- attr(X, "scaled:scale")
-    # my_center <- attr(X, "scaled:center")
   }
 
   # Cek pilihan metode
   if (!toupper(method) %in% c("ML", "REML")) {
-    cli::cli_abort('"method" must be ML or REML, not {method}')
+    cli::cli_abort('"method" must be ML or REML, not {method}.')
   }
   # cek vardir mengandung NA atau tidak
   if (any(is.na(vardir))) {
-    cli::cli_abort("Argument vardir contains NA values.")
+    cli::cli_abort("Argument {vardir_name} contains NA values.")
   }
   # cek Auxiliary variabels mengandung NA atau tidak
   if (any(is.na(X))) {
@@ -123,15 +120,14 @@ eblupfh <- function(formula, data, vardir, method = "REML",
   R <- diag(vardir, m)
   Z <- diag(1, m)
 
+
   # Fisher scoring algorithm
   if (method == "ML") {
     while ((diff > precision) & (k < maxiter)) {
       # inisialisasi varians pengaruh acak (sigma2 u)
       G <- diag(sigma2_u[k + 1], m)
       # varians y
       V <- Z %*% G %*% t(Z) + R
-      print(Z %*% G %*% t(Z))
-      print(V)
 
       Vi <- solve(V)
       XtVi <- Xt %*% Vi

diff --git a/R/eblupfh_cluster.R b/R/eblupfh_cluster.R
@@ -1,6 +1,6 @@
 #' EBLUPs based on a Fay-Herriot Model with Cluster Information.
 #'
-#' @description This function gives the EBLUP (or EB predictor under normality) based on a Fay-Herriot model with cluster information for non-sampled areas.
+#' @description This function gives the Empirical Best Linear Unbiased Prediction (EBLUP) or Empirical Best (EB) predictor based on a Fay-Herriot model with cluster information for non-sampled areas.
 #'
 #' @references
 #' \enumerate{
@@ -9,14 +9,14 @@
 #' }
 #'
 #' @param formula an object of class formula that contains a description of the model to be fitted. The variables included in the formula must be contained in the data.
-#' @param data a data frame or a data frame extension (e.g. a tibble)
-#' @param vardir vector or column names from data that contain variance sampling from the direct estimator for each area
+#' @param data a data frame or a data frame extension (e.g. a tibble).
+#' @param vardir vector or column names from data that contain variance sampling from the direct estimator for each area.
 #' @param cluster vector or column name from data that contain cluster information.
 #' @param method Fitting method can be chosen between 'ML' and 'REML'
 #' @param maxiter maximum number of iterations allowed in the Fisher-scoring algorithm. Default is 100 iterations.
 #' @param precision convergence tolerance limit for the Fisher-scoring algorithm. Default value is 0.0001.
-#' @param scale scaling auxiliary variable or not, default value is FALSE
-#' @param print_result print coefficient or not, default value is TRUE
+#' @param scale scaling auxiliary variable or not, default value is FALSE.
+#' @param print_result print coefficient or not, default value is TRUE.
 #'
 #' @returns The function returns a list with the following objects \code{df_res} and \code{fit}:
 #' \code{df_res} a data frame that contains the following columns: \cr
@@ -71,10 +71,10 @@ eblupfh_cluster <- function(formula, data, vardir, cluster, method = "REML",
 
 
   if (any(is.na(y)) & missing(cluster)) {
-    cli::cli_abort("variable y contains NA values, cluster information must be filled")
+    cli::cli_abort("variable {all.names(formula[2])} contains NA values, cluster information must be filled")
   }
   if (!any(is.na(y)) & !missing(cluster)) {
-    cli::cli_warn("variable y does not contain na and cluster variable is filled")
+    cli::cli_warn("variable {all.names(formula[2])} does not contain na and cluster variable is filled")
   }
 
   if (!any(is.na(y))) {
@@ -84,7 +84,7 @@ eblupfh_cluster <- function(formula, data, vardir, cluster, method = "REML",
     # Extract vardir and cluster
     clust <- .get_variable(data, cluster)
     if (any(is.na(clust))) {
-      cli::cli_abort("cluster variable contains NA values.")
+      cli::cli_abort("{cluster} variable contains NA values.")
     }
     # data sampled
     datas <- data[!nonsample, ]
@@ -93,7 +93,7 @@ eblupfh_cluster <- function(formula, data, vardir, cluster, method = "REML",
     df_res$cluster <- clust
   }
 
-
+  vardir_name <- vardir
   vardir <- .get_variable(datas, vardir)
   formuladata <- stats::model.frame(formula, datas, na.action = NULL)
   X <- stats::model.matrix(formula, datas)
@@ -107,11 +107,11 @@ eblupfh_cluster <- function(formula, data, vardir, cluster, method = "REML",
 
   # Cek pilihan metode
   if (!toupper(method) %in% c("ML", "REML")) {
-    cli::cli_abort('"method" must be ML or REML, not {method}')
+    cli::cli_abort('"method" must be ML or REML, not {method}.')
   }
   # cek vardir mengandung NA atau tidak
   if (any(is.na(vardir))) {
-    cli::cli_abort("Argument vardir contains NA values.")
+    cli::cli_abort("Argument {vardir_name} contains NA values.")
   }
   # cek Auxiliary variabels mengandung NA atau tidak
   if (any(is.na(X))) {
@@ -150,8 +150,6 @@ eblupfh_cluster <- function(formula, data, vardir, cluster, method = "REML",
       G <- diag(sigma2_u[k + 1], m)
       # varians y
       V <- Z %*% G %*% t(Z) + R
-      print(Z %*% G %*% t(Z))
-      print(V)
 
       Vi <- solve(V)
       XtVi <- Xt %*% Vi

diff --git a/R/logLik.R b/R/logLik.R
@@ -1,6 +1,6 @@
-#' Extract Log-Likelihood
+#' Extract Log-Likelihood.
 #'
-#' @param object EBLUP model
+#' @param object EBLUP model.
 #' @param ... further arguments passed to or from other methods.
 #'
 #' @return Log-Likehood value

diff --git a/R/saens.R b/R/saens.R
diff --git a/R/summary.R b/R/summary.R
@@ -1,7 +1,7 @@
-#' Summarizing EBLUP Model Fits
+#' Summarizing EBLUP Model Fits.
 #'
-#' @description `summary` method for class "eblupres"
-#' @param object EBLUP model
+#' @description `summary` method for class "eblupres".
+#' @param object EBLUP model.
 #' @param ... further arguments passed to or from other methods.
 #'
 #' @return The function returns a data frame that contains the following columns: \cr

diff --git a/README.Rmd b/README.Rmd
@@ -28,7 +28,7 @@ Ridson Al Farizal P [ridsonap\@bps.go.id](mailto:ridsonap@bps.go.id)
 
 ## Description
 
-This package is provide several functions for area level of small area estimation for non sampled areas using cluster information.
+Implementation of small area estimation (Fay-Herriot model) with EBLUP (Empirical Best Linear Unbiased Prediction) Approach for non-sampled area estimation by adding cluster information and assuming that there are similarities among particular areas. See also Rao & Molina (2015, ISBN:978-1-118-73578-7) and Anisa et al. (2013) <doi:10.9790/5728-10121519>.
 
 ## Installation
 

diff --git a/README.md b/README.md
@@ -18,8 +18,12 @@ Ridson Al Farizal P <ridsonap@bps.go.id>
 
 ## Description
 
-This package is provide several functions for area level of small area
-estimation for non sampled areas using cluster information.
+Implementation of small area estimation (Fay-Herriot model) with EBLUP
+(Empirical Best Linear Unbiased Prediction) Approach for non-sampled
+area estimation by adding cluster information and assuming that there
+are similarities among particular areas. See also Rao & Molina (2015,
+<ISBN:978-1-118-73578-7>) and Anisa et al. (2013)
+<doi:10.9790/5728-10121519>.
 
 ## Installation
 

diff --git a/man/AIC.eblupres.Rd b/man/AIC.eblupres.Rd
diff --git a/man/autoplot.eblupres.Rd b/man/autoplot.eblupres.Rd
diff --git a/man/coef.eblupres.Rd b/man/coef.eblupres.Rd
diff --git a/man/eblupfh.Rd b/man/eblupfh.Rd