CRAN 1.4.0

AlexChristensen · Oct 5, 2020 · 90f2f0e · 90f2f0e
1 parent cd20f0e
commit 90f2f0e
Show file tree

Hide file tree

Showing 13 changed files with 651 additions and 196 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SemNetCleaner
 Title: An Automated Cleaning Tool for Semantic and Linguistic Data
-Version: 1.2.1
-Date: 2020-08-28
+Version: 1.3.0
+Date: 2020-09-23
 Authors@R: c(person("Alexander P.", "Christensen", email = "alexpaulchristensen@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9798-7037")))
 Maintainer: Alexander P. Christensen <alexpaulchristensen@gmail.com>
 Description: Implements several functions that automates the cleaning and spell-checking of text data. Also converges, finalizes, removes plurals and continuous strings, and puts text data in binary format for semantic network analysis. Uses the 'SemNetDictionaries' package to make the cleaning process more accurate, efficient, and reproducible.
@@ -12,7 +12,5 @@ NeedsCompilation: no
 Encoding: UTF-8
 LazyData: true
 Depends: R (>= 3.6.0), SemNetDictionaries (>= 0.1.5)
-Imports: stringdist, hunspell, searcher, tcltk, foreign, readxl, R.matlab, stringi
-Suggests: knitr, rmarkdown, htmlTable
-VignetteBuilder: knitr
+Imports: stringdist, searcher, tcltk, foreign, readxl, R.matlab, stringi, rstudioapi, easycsv
 RoxygenNote: 7.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(best.guess)
 export(bin2resp)
+export(convert2snafu)
 export(correct.changes)
 export(pluralize)
 export(qwerty.dist)
@@ -21,3 +22,4 @@ importFrom(utils,read.csv)
 importFrom(utils,read.table)
 importFrom(utils,setTxtProgressBar)
 importFrom(utils,txtProgressBar)
+importFrom(utils,write.table)
diff --git a/NEWS b/NEWS
@@ -1,11 +1,17 @@
-Changes in version 1.2.1
+Changes in version 1.4.0
+
+o ADD: function to convert cleaned data to the SNAFU library's format in Python: `convert2snafu`
 
 o UPDATE: `correct.changes` included within `textcleaner` to streamline code for single function preprocessing
 
 o UPDATE: `edit` function added within the `correct.changes` function to allow more seamless interface
 
 o UPDATE: change tracking has been added to `correct.changes` in the output (`$spellcheck$changes`)
 
+o UPDATE: `textcleaner`'s help is now more helpful
+
+o UPDATE: text styling (face and color) are now adaptive to OS, software (R and RStudio), and RStudio theme
+
 
 Changes in version 1.2.0
 

diff --git a/R/convert2snafu.R b/R/convert2snafu.R
@@ -0,0 +1,141 @@
+#' Pathfinder Network 
+#' 
+#' @description Estimates a pathfinder network using the MST-Pathfinder
+#' Network method from Quirin et al. (2008; see also Schvaneveldt, 1990)
+#' 
+#' @param ... Matrix or data frame.
+#' A clean response matrices
+#' 
+#' @param category Character.
+#' Category of verbal fluency data
+#' 
+#' @return A .csv file formatted for SNAFU
+#' 
+#' @details The format of the file has 7 columns:
+#' \itemize{
+#' \item{id}{Defaults to the row names of the inputted \code{data}}
+#' 
+#' \item{listnum}{The list number for the fluency category. Defaults to 0.
+#' Future implementations will allow more lists}
+#' 
+#' \item{category}{The verbal fluency category that is input into the
+#' \code{category} argument}
+#' 
+#' \item{item}{The verbal fluency responses for every participant}
+#' 
+#' \item{RT}{Response time. Currently not implemented. Defaults to 0}
+#' 
+#' \item{RTstart}{Start of response time. Currently not implemented. Defaults to 0}
+#' 
+#' \item{group}{Names of groups. Defaults to the names of the objects input into
+#' the function (\code{...})}
+#' }
+#' 
+#' @examples
+#' # Convert data to SNAFU
+#' if(interactive())
+#' {convert2snafu(open.clean, category = "animals")}
+#' 
+#' @references 
+#' # For SNAFU, see:
+#' Zemla, J. C., Cao, K., Mueller, K. D., & Austerweil, J. L. (2020).
+#' SNAFU: The Semantic Network and Fluency Utility.
+#' \emph{Behavior Research Methods}, 1-19.
+#' https://doi.org/10.3758/s13428-019-01343-w
+#' 
+#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
+#' 
+#' @importFrom utils write.table
+#' 
+#' @export
+# Convert data to SNAFU
+# Updated 24.09.2020
+convert2snafu <- function (..., category)
+{
+  # Data list
+  data.list <- list(...)
+
+  # Initialize snafu matrix
+  snafu.mat <- matrix(0, nrow = 0, ncol = 7)
+  colnames(snafu.mat) <- c("id", "listnum", "category", "item", "RT", "RTstart", "group")
+
+  if(length(data.list) == 1)
+  {
+
+  }else{
+
+    # Get group names
+    name <- as.character(substitute(list(...)))
+    name <- name[-which(name=="list")]
+
+    for(i in 1:length(data.list))
+    {
+      # Target data
+      target.data <- as.matrix(data.list[[i]])
+
+      # Number of possible responses
+      n <- ncol(target.data)
+
+      # IDs
+      if(is.null(row.names(target.data)))
+      {id <- paste("A", 1:nrow(target.data), sep = "")
+      }else{id <- paste("A", formatC(as.numeric(row.names(target.data)), digits = 2, format = "d", flag = 0), sep = "")}
+
+      for(j in 1:nrow(target.data))
+      {
+        # Target participant
+        target.part <- target.data[j,]
+
+        # Item
+        item <- na.omit(target.part)
+
+        # Target ID
+        target.id <- rep(id[j], length(item))
+
+        # List number
+        listnum <- rep(0, length(item))
+
+        # Category
+        categorey <- rep(category, length(item))
+
+        # RT
+        RT <- rep(0, length(item)) 
+
+        # RTstart
+        RTstart <- rep(0, length(item))
+
+        # Group
+        group <- rep(name[i], length(item))
+
+        # Bind data
+        target.mat <- cbind(target.id, listnum,
+                            categorey, item,
+                            RT, RTstart, group)
+
+        row.names(target.mat) <- NULL
+        colnames(target.mat) <- colnames(snafu.mat)
+
+        # Append snafu matrix
+        snafu.mat <- rbind(snafu.mat, target.mat)
+      }
+    }
+
+  }
+
+  # Choose directory
+  DIR <- easycsv::choose_dir()
+
+  # Get file name
+  FILENAME <- readline("Name of file: ")
+
+  # Set up path
+  PATH <- paste(DIR, FILENAME, sep = "/")
+  PATH <- gsub("\\\\", "/", PATH)
+  PATH <- paste(PATH, "csv", sep = ".")
+
+  write.table(snafu.mat, file = PATH,
+              quote = FALSE, sep = ",", row.names = FALSE)
+
+  # Message to user
+  message(paste("SNAFU formatted file was saved in: "), PATH)
+}
diff --git a/R/correct.changes.R b/R/correct.changes.R
@@ -81,7 +81,7 @@
 #' @export
 #' 
 # Correct changes----
-# Updated 21.08.2020
+# Updated 10.09.2020
 # Major update: 19.04.2020
 correct.changes <- function(textcleaner.obj)
 {
@@ -94,7 +94,7 @@ correct.changes <- function(textcleaner.obj)
 
   readline("Press ENTER to continue...")
 
-  cat(colortext("\n\nThe first column of the spreadsheet corresponds to the", defaults = "message"))
+  cat(colortext("\nThe first column of the spreadsheet corresponds to the", defaults = "message"))
   cat(colortext("\nrow number provided in the output object `$spellcheck$correspondence`", defaults = "message"))
   cat(colortext("\n(see ?textcleaner for more information about this output).", defaults = "message"))
 
@@ -146,14 +146,15 @@ correct.changes <- function(textcleaner.obj)
     for(i in 1:length(target.changes))
     {
       ## Set up change matrix
-      chn.mat <- rbind(automated[target.changes[i],-1], changes[target.changes[i],-1])
-      colnames(chn.mat) <- rep("to", ncol(chn.mat))
-      row.names(chn.mat) <- c("Automated", "Corrected")
+      chn.mat <- rbind(automated[target.changes[i],], changes[target.changes[i],])
+      colnames(chn.mat)[-1] <- rep("to", ncol(chn.mat)-1)
+      row.names(chn.mat) <- c("Previous", "Corrected")
+      chn.mat <- chn.mat[,-which(apply(chn.mat, 2, function(x){all(is.na(x))}))]
 
       track.changes[[automated[target.changes[i],1]]] <- chn.mat
     }
 
-    res$spellcheck$changes <- track.changes
+    res$spellcheck$verified <- track.changes
 
     ## Original is used (rather than corrected) to run through same preprocessing
     ## as in textcleaner (far more efficient than actually changing through each

diff --git a/R/open.clean.R b/R/open.clean.R
@@ -0,0 +1,26 @@
+# Cleaned response Matrices (Openness and Verbal Fluency)----
+#' Cleaned response Matrices (Openness and Verbal Fluency)
+#'
+#' Cleaned response matrices for the Animals verbal fluency data (\emph{n} = 516)
+#' from Christensen et al. (2018).
+#' 
+#' @name open.clean
+#' 
+#' @docType data
+#' 
+#' @usage data(open.clean)
+#' 
+#' @format open.clean (matrix, 516 x 35)
+#' 
+#' @keywords datasets
+#' 
+#' @references
+#' Christensen, A. P., Kenett, Y. N., Cotter, K. N., Beaty, R. E., & Silvia, P. J. (2018).
+#' Remotely close associations: Openness to experience and semantic memory structure.
+#' \emph{European Journal of Personality}, \emph{32}, 480-492.
+#' doi:\href{https://doi.org/10.1002/per.2157}{10.1002/per.2157}
+#' 
+#' @examples 
+#' data("open.clean")
+NULL
+#----
diff --git a/R/textcleaner.R b/R/textcleaner.R
@@ -26,18 +26,18 @@
 #' Use \code{dictionaries()} or \code{find.dictionaries()} for more options
 #' (See \code{\link{SemNetDictionaries}} for more details)
 #' 
+#' @param add.path Character.
+#' Path to additional dictionaries to be found.
+#' DOES NOT search recursively (through all folders in path)
+#' to avoid time intensive search.
+#' Set to \code{"choose"} to open an interactive directory explorer
+#' 
 #' @param continue List.
 #' A result previously unfinished that still needs to be completed.
 #' Allows you to continue to manually spell-check their data
 #' after you've closed or errored out.
 #' Defaults to \code{NULL}
 #' 
-#' @param walkthrough Boolean.
-#' Whether a walkthrough should be provided (recommended for first time users).
-#' Defaults to \code{NULL}, which will ask whether you would like a walkthrough.
-#' Set to \code{TRUE} to do the walkthrough.
-#' Set to \code{FALSE} to skip the walkthrough
-#' 
 #' @return This function returns a list containing the following objects:
 #' 
 #' \item{binary}{A matrix of responses where each row represents a participant
@@ -48,11 +48,11 @@
 #' 
 #' \itemize{
 #' 
-#' \item{clean}
+#' \item{\code{clean}}
 #' {A response matrix that has been spell-checked and de-pluralized with duplicates removed.
 #' This can be used as a final dataset for analyses (e.g., fluency of responses)}
 #' 
-#' \item{original}
+#' \item{\code{original}}
 #' {The original response matrix that has had white spaces before and
 #' after words response. Also converts all upper-case letters to lower case}
 #' 
@@ -116,13 +116,13 @@
 #' 
 #' @export
 # Text Cleaner----
-# Updated 21.08.2020
+# Updated 08.09.2020
 # Major update: 19.04.2020
 textcleaner <- function(data = NULL, miss = 99,
                         partBY = c("row","col"),
-                        dictionary = NULL,
-                        continue = NULL,
-                        walkthrough = NULL)
+                        dictionary = NULL, add.path = NULL,
+                        continue = NULL#, walkthrough = NULL
+                        )
 {
   # Check if user is continuing from a previous point
   if(is.null(continue))
@@ -182,7 +182,9 @@ textcleaner <- function(data = NULL, miss = 99,
     spell.check <- try(
       spellcheck.dictionary(uniq.resp = uniq.resp,
                             dictionary = dictionary,
-                            data = data, walkthrough = walkthrough),
+                            add.path = add.path,
+                            data = data#, walkthrough = walkthrough
+                            ),
       silent <- TRUE
     )
 
@@ -251,7 +253,6 @@ textcleaner <- function(data = NULL, miss = 99,
 
   ## Make sure to replace faux "NA" with real NA
   corrected$corrected[which(corrected$corrected == "NA")] <- NA
-  res$responses$checked <- as.data.frame(corrected$corrected, stringsAsFactors = FALSE)
 
   ## Cleaned responses (no instrusions or perseverations)
   cleaned.list <- apply(corrected$corrected, 1, function(x){unique(na.omit(x))})
@@ -270,7 +271,6 @@ textcleaner <- function(data = NULL, miss = 99,
 
   res$responses$clean <- cleaned.matrix
 
-
   # Convert to binary response matrix (error catch)
   res$responses$binary <- try(
     resp2bin(corrected$corrected),
@@ -292,12 +292,52 @@ textcleaner <- function(data = NULL, miss = 99,
   class(res) <- "textcleaner"
 
   # Correct auto-corrections
-  res <- correct.changes(res)
+  res <- try(correct.changes(res), silent = TRUE)
+
+  if(any(class(res) == "try-error"))
+  {
+    error.fun(res, "correct.changes", "textcleaner")
+
+    return(res)
+  }
 
   # Let user know spell-check is complete
   Sys.sleep(1)
   message("\nPreprocessing complete.\n")
-  Sys.sleep(1)
+  Sys.sleep(2)
+
+  # Let user know where to send their dictionaries and monikers
+  dictionary.output <- paste(
+    textsymbol("bullet"),
+    "Dictionary output: `OBJECT_NAME$dictionary`",
+    sep = " "
+  )
+
+  moniker.output <- paste(
+    textsymbol("bullet"),
+    "Moniker output: `OBJECT_NAME$spellcheck$manual`",
+    sep = " "
+  )
+
+  cat(
+
+    colortext(
+
+      paste(
+        "Consider submitting your dictionary and spelling corrections (i.e., monikers) to:\n\n",
+        "https://github.com/AlexChristensen/SemNetDictionaries/issues/new/choose\n\n",
+        dictionary.output, "\n\n",
+        moniker.output, "\n\n"
+      ),
+
+      defaults = "message"
+
+    )
+
+  )
+
+  Sys.sleep(2)
+
 
   return(res)
 }