Skip to content

Commit

Permalink
CRAN 1.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexChristensen committed Oct 5, 2020
1 parent cd20f0e commit 90f2f0e
Show file tree
Hide file tree
Showing 13 changed files with 651 additions and 196 deletions.
8 changes: 3 additions & 5 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: SemNetCleaner
Title: An Automated Cleaning Tool for Semantic and Linguistic Data
Version: 1.2.1
Date: 2020-08-28
Version: 1.3.0
Date: 2020-09-23
Authors@R: c(person("Alexander P.", "Christensen", email = "alexpaulchristensen@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9798-7037")))
Maintainer: Alexander P. Christensen <alexpaulchristensen@gmail.com>
Description: Implements several functions that automates the cleaning and spell-checking of text data. Also converges, finalizes, removes plurals and continuous strings, and puts text data in binary format for semantic network analysis. Uses the 'SemNetDictionaries' package to make the cleaning process more accurate, efficient, and reproducible.
Expand All @@ -12,7 +12,5 @@ NeedsCompilation: no
Encoding: UTF-8
LazyData: true
Depends: R (>= 3.6.0), SemNetDictionaries (>= 0.1.5)
Imports: stringdist, hunspell, searcher, tcltk, foreign, readxl, R.matlab, stringi
Suggests: knitr, rmarkdown, htmlTable
VignetteBuilder: knitr
Imports: stringdist, searcher, tcltk, foreign, readxl, R.matlab, stringi, rstudioapi, easycsv
RoxygenNote: 7.1.1
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -2,6 +2,7 @@

export(best.guess)
export(bin2resp)
export(convert2snafu)
export(correct.changes)
export(pluralize)
export(qwerty.dist)
Expand All @@ -21,3 +22,4 @@ importFrom(utils,read.csv)
importFrom(utils,read.table)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(utils,write.table)
8 changes: 7 additions & 1 deletion NEWS
@@ -1,11 +1,17 @@
Changes in version 1.2.1
Changes in version 1.4.0

o ADD: function to convert cleaned data to the SNAFU library's format in Python: `convert2snafu`

o UPDATE: `correct.changes` included within `textcleaner` to streamline code for single function preprocessing

o UPDATE: `edit` function added within the `correct.changes` function to allow more seamless interface

o UPDATE: change tracking has been added to `correct.changes` in the output (`$spellcheck$changes`)

o UPDATE: `textcleaner`'s help is now more helpful

o UPDATE: text styling (face and color) are now adaptive to OS, software (R and RStudio), and RStudio theme


Changes in version 1.2.0

Expand Down
141 changes: 141 additions & 0 deletions R/convert2snafu.R
@@ -0,0 +1,141 @@
#' Pathfinder Network
#'
#' @description Estimates a pathfinder network using the MST-Pathfinder
#' Network method from Quirin et al. (2008; see also Schvaneveldt, 1990)
#'
#' @param ... Matrix or data frame.
#' A clean response matrices
#'
#' @param category Character.
#' Category of verbal fluency data
#'
#' @return A .csv file formatted for SNAFU
#'
#' @details The format of the file has 7 columns:
#' \itemize{
#' \item{id}{Defaults to the row names of the inputted \code{data}}
#'
#' \item{listnum}{The list number for the fluency category. Defaults to 0.
#' Future implementations will allow more lists}
#'
#' \item{category}{The verbal fluency category that is input into the
#' \code{category} argument}
#'
#' \item{item}{The verbal fluency responses for every participant}
#'
#' \item{RT}{Response time. Currently not implemented. Defaults to 0}
#'
#' \item{RTstart}{Start of response time. Currently not implemented. Defaults to 0}
#'
#' \item{group}{Names of groups. Defaults to the names of the objects input into
#' the function (\code{...})}
#' }
#'
#' @examples
#' # Convert data to SNAFU
#' if(interactive())
#' {convert2snafu(open.clean, category = "animals")}
#'
#' @references
#' # For SNAFU, see:
#' Zemla, J. C., Cao, K., Mueller, K. D., & Austerweil, J. L. (2020).
#' SNAFU: The Semantic Network and Fluency Utility.
#' \emph{Behavior Research Methods}, 1-19.
#' https://doi.org/10.3758/s13428-019-01343-w
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @importFrom utils write.table
#'
#' @export
# Convert data to SNAFU
# Updated 24.09.2020
convert2snafu <- function (..., category)
{
# Data list
data.list <- list(...)

# Initialize snafu matrix
snafu.mat <- matrix(0, nrow = 0, ncol = 7)
colnames(snafu.mat) <- c("id", "listnum", "category", "item", "RT", "RTstart", "group")

if(length(data.list) == 1)
{

}else{

# Get group names
name <- as.character(substitute(list(...)))
name <- name[-which(name=="list")]

for(i in 1:length(data.list))
{
# Target data
target.data <- as.matrix(data.list[[i]])

# Number of possible responses
n <- ncol(target.data)

# IDs
if(is.null(row.names(target.data)))
{id <- paste("A", 1:nrow(target.data), sep = "")
}else{id <- paste("A", formatC(as.numeric(row.names(target.data)), digits = 2, format = "d", flag = 0), sep = "")}

for(j in 1:nrow(target.data))
{
# Target participant
target.part <- target.data[j,]

# Item
item <- na.omit(target.part)

# Target ID
target.id <- rep(id[j], length(item))

# List number
listnum <- rep(0, length(item))

# Category
categorey <- rep(category, length(item))

# RT
RT <- rep(0, length(item))

# RTstart
RTstart <- rep(0, length(item))

# Group
group <- rep(name[i], length(item))

# Bind data
target.mat <- cbind(target.id, listnum,
categorey, item,
RT, RTstart, group)

row.names(target.mat) <- NULL
colnames(target.mat) <- colnames(snafu.mat)

# Append snafu matrix
snafu.mat <- rbind(snafu.mat, target.mat)
}
}

}

# Choose directory
DIR <- easycsv::choose_dir()

# Get file name
FILENAME <- readline("Name of file: ")

# Set up path
PATH <- paste(DIR, FILENAME, sep = "/")
PATH <- gsub("\\\\", "/", PATH)
PATH <- paste(PATH, "csv", sep = ".")

write.table(snafu.mat, file = PATH,
quote = FALSE, sep = ",", row.names = FALSE)

# Message to user
message(paste("SNAFU formatted file was saved in: "), PATH)
}
13 changes: 7 additions & 6 deletions R/correct.changes.R
Expand Up @@ -81,7 +81,7 @@
#' @export
#'
# Correct changes----
# Updated 21.08.2020
# Updated 10.09.2020
# Major update: 19.04.2020
correct.changes <- function(textcleaner.obj)
{
Expand All @@ -94,7 +94,7 @@ correct.changes <- function(textcleaner.obj)

readline("Press ENTER to continue...")

cat(colortext("\n\nThe first column of the spreadsheet corresponds to the", defaults = "message"))
cat(colortext("\nThe first column of the spreadsheet corresponds to the", defaults = "message"))
cat(colortext("\nrow number provided in the output object `$spellcheck$correspondence`", defaults = "message"))
cat(colortext("\n(see ?textcleaner for more information about this output).", defaults = "message"))

Expand Down Expand Up @@ -146,14 +146,15 @@ correct.changes <- function(textcleaner.obj)
for(i in 1:length(target.changes))
{
## Set up change matrix
chn.mat <- rbind(automated[target.changes[i],-1], changes[target.changes[i],-1])
colnames(chn.mat) <- rep("to", ncol(chn.mat))
row.names(chn.mat) <- c("Automated", "Corrected")
chn.mat <- rbind(automated[target.changes[i],], changes[target.changes[i],])
colnames(chn.mat)[-1] <- rep("to", ncol(chn.mat)-1)
row.names(chn.mat) <- c("Previous", "Corrected")
chn.mat <- chn.mat[,-which(apply(chn.mat, 2, function(x){all(is.na(x))}))]

track.changes[[automated[target.changes[i],1]]] <- chn.mat
}

res$spellcheck$changes <- track.changes
res$spellcheck$verified <- track.changes

## Original is used (rather than corrected) to run through same preprocessing
## as in textcleaner (far more efficient than actually changing through each
Expand Down
26 changes: 26 additions & 0 deletions R/open.clean.R
@@ -0,0 +1,26 @@
# Cleaned response Matrices (Openness and Verbal Fluency)----
#' Cleaned response Matrices (Openness and Verbal Fluency)
#'
#' Cleaned response matrices for the Animals verbal fluency data (\emph{n} = 516)
#' from Christensen et al. (2018).
#'
#' @name open.clean
#'
#' @docType data
#'
#' @usage data(open.clean)
#'
#' @format open.clean (matrix, 516 x 35)
#'
#' @keywords datasets
#'
#' @references
#' Christensen, A. P., Kenett, Y. N., Cotter, K. N., Beaty, R. E., & Silvia, P. J. (2018).
#' Remotely close associations: Openness to experience and semantic memory structure.
#' \emph{European Journal of Personality}, \emph{32}, 480-492.
#' doi:\href{https://doi.org/10.1002/per.2157}{10.1002/per.2157}
#'
#' @examples
#' data("open.clean")
NULL
#----
74 changes: 57 additions & 17 deletions R/textcleaner.R
Expand Up @@ -26,18 +26,18 @@
#' Use \code{dictionaries()} or \code{find.dictionaries()} for more options
#' (See \code{\link{SemNetDictionaries}} for more details)
#'
#' @param add.path Character.
#' Path to additional dictionaries to be found.
#' DOES NOT search recursively (through all folders in path)
#' to avoid time intensive search.
#' Set to \code{"choose"} to open an interactive directory explorer
#'
#' @param continue List.
#' A result previously unfinished that still needs to be completed.
#' Allows you to continue to manually spell-check their data
#' after you've closed or errored out.
#' Defaults to \code{NULL}
#'
#' @param walkthrough Boolean.
#' Whether a walkthrough should be provided (recommended for first time users).
#' Defaults to \code{NULL}, which will ask whether you would like a walkthrough.
#' Set to \code{TRUE} to do the walkthrough.
#' Set to \code{FALSE} to skip the walkthrough
#'
#' @return This function returns a list containing the following objects:
#'
#' \item{binary}{A matrix of responses where each row represents a participant
Expand All @@ -48,11 +48,11 @@
#'
#' \itemize{
#'
#' \item{clean}
#' \item{\code{clean}}
#' {A response matrix that has been spell-checked and de-pluralized with duplicates removed.
#' This can be used as a final dataset for analyses (e.g., fluency of responses)}
#'
#' \item{original}
#' \item{\code{original}}
#' {The original response matrix that has had white spaces before and
#' after words response. Also converts all upper-case letters to lower case}
#'
Expand Down Expand Up @@ -116,13 +116,13 @@
#'
#' @export
# Text Cleaner----
# Updated 21.08.2020
# Updated 08.09.2020
# Major update: 19.04.2020
textcleaner <- function(data = NULL, miss = 99,
partBY = c("row","col"),
dictionary = NULL,
continue = NULL,
walkthrough = NULL)
dictionary = NULL, add.path = NULL,
continue = NULL#, walkthrough = NULL
)
{
# Check if user is continuing from a previous point
if(is.null(continue))
Expand Down Expand Up @@ -182,7 +182,9 @@ textcleaner <- function(data = NULL, miss = 99,
spell.check <- try(
spellcheck.dictionary(uniq.resp = uniq.resp,
dictionary = dictionary,
data = data, walkthrough = walkthrough),
add.path = add.path,
data = data#, walkthrough = walkthrough
),
silent <- TRUE
)

Expand Down Expand Up @@ -251,7 +253,6 @@ textcleaner <- function(data = NULL, miss = 99,

## Make sure to replace faux "NA" with real NA
corrected$corrected[which(corrected$corrected == "NA")] <- NA
res$responses$checked <- as.data.frame(corrected$corrected, stringsAsFactors = FALSE)

## Cleaned responses (no instrusions or perseverations)
cleaned.list <- apply(corrected$corrected, 1, function(x){unique(na.omit(x))})
Expand All @@ -270,7 +271,6 @@ textcleaner <- function(data = NULL, miss = 99,

res$responses$clean <- cleaned.matrix


# Convert to binary response matrix (error catch)
res$responses$binary <- try(
resp2bin(corrected$corrected),
Expand All @@ -292,12 +292,52 @@ textcleaner <- function(data = NULL, miss = 99,
class(res) <- "textcleaner"

# Correct auto-corrections
res <- correct.changes(res)
res <- try(correct.changes(res), silent = TRUE)

if(any(class(res) == "try-error"))
{
error.fun(res, "correct.changes", "textcleaner")

return(res)
}

# Let user know spell-check is complete
Sys.sleep(1)
message("\nPreprocessing complete.\n")
Sys.sleep(1)
Sys.sleep(2)

# Let user know where to send their dictionaries and monikers
dictionary.output <- paste(
textsymbol("bullet"),
"Dictionary output: `OBJECT_NAME$dictionary`",
sep = " "
)

moniker.output <- paste(
textsymbol("bullet"),
"Moniker output: `OBJECT_NAME$spellcheck$manual`",
sep = " "
)

cat(

colortext(

paste(
"Consider submitting your dictionary and spelling corrections (i.e., monikers) to:\n\n",
"https://github.com/AlexChristensen/SemNetDictionaries/issues/new/choose\n\n",
dictionary.output, "\n\n",
moniker.output, "\n\n"
),

defaults = "message"

)

)

Sys.sleep(2)


return(res)
}
Expand Down

0 comments on commit 90f2f0e

Please sign in to comment.