Merge branch 'master' into topicCoherence

Docma-TU · May 21, 2019 · b7ac785 · b7ac785
2 parents 7090f8d + f171f5a
commit b7ac785
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 18 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,7 +4,7 @@ Title: Tools for Statistical Content Analysis
 Version: 0.1-4
 Date: 2019-05-17
 Authors@R: c(person("Lars", "Koppers", email="koppers@statistik.tu-dortmund.de", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-1642-9616")),
-	     person("Jonas", "Rieger", email="jonas.rieger@tu-dortmund.de", role=c("aut")),
+	     person("Jonas", "Rieger", email="jonas.rieger@tu-dortmund.de", role=c("aut"), comment = c(ORCID = "0000-0002-0007-4478")),
 	     person("Karin", "Boczek", email="karin.boczek@tu-dortmund.de", role=c("ctb"), comment = c(ORCID = "0000-0003-1516-4094")),
 	     person("Gerret", "von Nordheim", email="gerret.vonnordheim@tu-dortmund.de", role=c("ctb"), comment = c(ORCID = "0000-0001-7553-3838")))
 Description: A framework for statistical analysis in content analysis. In addition to a pipeline for preprocessing text corpora and linking to the latent Dirichlet allocation from the 'lda' package, plots are offered for the descriptive analysis of text corpora and topic models. In addition, an implementation of Chang's intruder words and intruder topics is provided.

diff --git a/R/LDAgen.R b/R/LDAgen.R
@@ -68,16 +68,14 @@ LDAgen <- function(documents, K = 100L, vocab, num.iterations = 200L,
                                               alpha = alpha, eta = eta,
                                               compute.log.likelihood = TRUE)
         ldaID <- names(documents)
-        save(list = c("result", "ldaID"), file = paste(folder, "-k", K,
-                                              "i", num.iterations,
-                                              "b", burnin, "s", seed,
-                                              "alpha", round(alpha,2),
-                                              "eta", round(eta,2),
-                                              ".RData", sep = ""))
+        save(list = c("result", "ldaID"), file = paste0(folder, "-k", K, "alpha", round(alpha,2), 
+                                                        "eta", round(eta,2), "i", num.iterations, 
+                                                        "b", burnin, "s", seed, ".RData"))
     }
     else{
-        load(paste(folder, "-k", K, "i", num.iterations, "b", burnin, "s", seed, "alpha",
-                   round(alpha,2), "eta", round(eta,2), ".RData", sep = ""))
+        load(paste0(folder, "-k", K, "alpha", round(alpha,2), 
+                    "eta", round(eta,2), "i", num.iterations, 
+                    "b", burnin, "s", seed, ".RData"))
     }
     ttw <- lda::top.topic.words(result$topics, num.words = num.words, by.score = TRUE)
     if(count){
@@ -100,8 +98,10 @@ LDAgen <- function(documents, K = 100L, vocab, num.iterations = 200L,
       ttw <- rbind(round(t(result$topic_sums / sum(result$topic_sums))*100,2), ttw)
     }
     rownames(ttw) <- c("Topic", 1:num.words)
-    write.csv(ttw, file = paste(folder, "-k", K, "alpha", round(alpha,2), "eta", round(eta,2), "i", num.iterations, "b", burnin, "s",
-                  seed, ".csv", sep = ""), fileEncoding="UTF-8")
+    write.csv(ttw, file = paste0(folder, "-k", K, "alpha", round(alpha,2), 
+                                 "eta", round(eta,2), "i", num.iterations, 
+                                 "b", burnin, "s", seed, ".csv"), 
+              fileEncoding="UTF-8")
     invisible(result)
 }
 
diff --git a/R/LDAprep.R b/R/LDAprep.R
@@ -58,7 +58,7 @@ LDAprep <- function(text, vocab,
                                         #
     if(reduce){                         # delete entries where dimension is not computable
         tmp <- lengths(lapply(text, dim)) == 0
-        if (length(tmp) > 0) text <- text[!tmp]
+        if (any(tmp)) text <- text[!tmp]
         Dim <- sapply(text, dim)
         text <- text[Dim[2,] != 0]
         text <- text[Dim[1,] != 1]

diff --git a/R/makeWordlist.R b/R/makeWordlist.R
@@ -7,7 +7,8 @@
 #' @param text List of texts.
 #' @param k Integer: How many texts should be processed at once (RAM
 #' usage)?
-#' @param ... further arguments for the sort function.
+#' @param ... further arguments for the sort function. Often you
+#' want to set \code{method = "radix"}.
 #' @return \item{words}{An alphabetical list of the words in the corpus}
 #' \item{wordtable}{A frequency table of the words in the corpus}
 #' @keywords manip
@@ -35,7 +36,7 @@ makeWordlist <- function(text, k = 100000L, ...){
     words <- c(words, unique(unlist(text[(i*k+1):(min(n, i*k+k))])))
   }
   message("  ", n, " next step")
-  words <- sort(unique(words),...)
+  words <- sort(unique(words), ...)
   message("calculate counts...\n done:")
   wordtable <- rep(0, length(words))
   names(wordtable) <- words

diff --git a/README.md b/README.md
@@ -1,7 +1,24 @@
-# tmT
-Text mining Tools 
+# tosca
+Tools for Statistical Content Analysis
+
+# About
+tosca is a package for statistical analysis in content analysis, created at TU Dortmund University http://docma.tu-dortmund.de/cms/de/home/R-Paket-_tosca_/index.html
+
+# How to use
+
+See examples how to use tosca at https://cran.r-project.org/web/packages/tosca/vignettes/Vignette.pdf
+
+# How to cite
+
+For a BibTeX entry please use the output from citation(package = "tosca")
+
+# Feedback
+
+For wishes, issues, and bugs please use: https://github.com/Docma-TU/tosca/issues
+
 
 [![Build Status](https://travis-ci.org/Docma-TU/tosca.svg?branch=master)](https://travis-ci.org/Docma-TU/tosca) 
 [![Coverage Status](https://coveralls.io/repos/github/Docma-TU/tosca/badge.svg?branch=master)](https://coveralls.io/github/Docma-TU/tosca?branch=master)
 [![CRAN Status Badge](http://www.r-pkg.org/badges/version/tosca)](https://CRAN.R-project.org/package=tosca)
-[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/tosca)](https://cran.rstudio.com/web/packages/tosca/index.html)
+[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/tosca)](https://CRAN.R-project.org/package=tosca)
+[![Total Downloads](https://cranlogs.r-pkg.org/badges/grand-total/tosca?color=orange)](https://CRAN.R-project.org/package=tosca)
diff --git a/man/makeWordlist.Rd b/man/makeWordlist.Rd