Skip to content

Commit

Permalink
paste0(tempdir() -> file.path(tempdir()
Browse files Browse the repository at this point in the history
  • Loading branch information
lkoppers committed Sep 3, 2018
1 parent 846f442 commit a282d31
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 17 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: tosca
Type: Package
Title: Tools for Statistical Content Analysis
Version: 0.1-0
Date: 2018-08-31
Version: 0.1-1
Date: 2018-09-03
Authors@R: c(person("Lars", "Koppers", email="koppers@statistik.tu-dortmund.de", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-1642-9616")),
person("Jonas", "Rieger", email="riegerjonas@gmx.de", role=c("aut")),
person("Karin", "Boczek", email="karin.boczek@tu-dortmund.de", role=c("ctb"), comment = c(ORCID = "0000-0003-1516-4094")),
Expand Down
2 changes: 1 addition & 1 deletion R/LDAgen.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#' @export LDAgen
LDAgen <- function(documents, K = 100L, vocab, num.iterations = 200L,
burnin = 70L, alpha = NULL, eta = NULL, seed = NULL,
folder = paste0(tempdir(),"/lda-result"), num.words = 50L, LDA = TRUE, count = FALSE){
folder = file.path(tempdir(),"lda-result"), num.words = 50L, LDA = TRUE, count = FALSE){
if(is.null(alpha)) alpha <- 1/K
if(is.null(eta)) eta <- 1/K
if(is.null(seed)) seed <- sample(1:10^8,1)
Expand Down
2 changes: 1 addition & 1 deletion man/LDAgen.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_LDAgen.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ load("data/LDAdoc_compare.RData")

csvTest <- read.csv("data/test-k3i20b70s24602alpha0.33eta0.33_orig.csv")
counttest <- LDAgen(documents=LDAdoc, K = 3L, vocab=wordlist$words, num.iterations = 20L, burnin = 70L, seed=24602, num.words = 10L, LDA = TRUE, count=TRUE)
csvTest2 <- read.csv(paste0(tempdir(),"/lda-result-k3i20b70s24602alpha0.33eta0.33.csv"))
csvTest2 <- read.csv(file.path(tempdir(),"lda-result-k3i20b70s24602alpha0.33eta0.33.csv"))
expect_equal(csvTest, csvTest2)

expect_equal(lda1, LDAgen(documents=LDAdoc, K = 3L, vocab=wordlist$words, num.iterations = 20L, burnin = 70L, seed=24601, folder=tempdir(), num.words = 10L, LDA = TRUE))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_clusterTopics.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ load("data/test-k3i20b70s24601alpha0.33eta0.33.RData")

load("data/clusterTopics.RData")

expect_equal(cT, clusterTopics(ldaresult=result, file=paste0(tempdir(),"/abc.pdf"), method = "average", width=30, height=15))
expect_equal(cT2, clusterTopics(ldaresult=result, file=paste0(tempdir(),"/abc.pdf"), method = "single", width=30, height=15))
expect_equal(cT, clusterTopics(ldaresult=result, file=file.path(tempdir(),"abc.pdf"), method = "average", width=30, height=15))
expect_equal(cT2, clusterTopics(ldaresult=result, file=file.path(tempdir(),"abc.pdf"), method = "single", width=30, height=15))
})
8 changes: 4 additions & 4 deletions tests/testthat/test_plotHeat.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ test_that("plotHeat", {

obj <- textmeta(text = text, meta = meta)

res1 <- plotHeat(object = obj, ldaresult = lda, ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"))
res1 <- plotHeat(object = obj, ldaresult = lda, ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"))
expect_equal(dim(res1), c(3, 4))
res2 <- plotHeat(object = obj, ldaresult = lda, ldaID = ldaID, unit = "month",
file = paste0(tempdir(),"/abc.pdf"))
file = file.path(tempdir(),"abc.pdf"))
expect_true(all(res2$date == seq(min(res2$date), max(res2$date), "month")))
res3 <- plotHeat(object = obj, ldaresult = lda, ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"),
res3 <- plotHeat(object = obj, ldaresult = lda, ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"),
norm = TRUE)
expect_equal(dim(res3), c(3, 4))
res4 <- plotHeat(object = textmeta(meta = obj$meta), ldaresult = lda,
ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"))
ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"))
expect_equal(dim(res4), c(3, 4))
expect_true(all(res4$date == seq(min(res4$date), max(res4$date), "year")))
expect_true(all(res3$date == seq(min(res3$date), max(res3$date), "year")))
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_plotTopic.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("plotTopic", {
res5 <- plotTopic(object = obj, ldaresult = lda, ldaID = ldaID, rel = TRUE)
expect_true(all(res5$date == res1$date), all(colnames(res1) == colnames(res5)),
all(res5[, -1] <= 1))
res6 <- plotTopic(object = obj, ldaresult = lda, ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"))
res6 <- plotTopic(object = obj, ldaresult = lda, ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"))
expect_equal(res1, res6)
res7 <- plotTopic(object = obj, ldaresult = lda, ldaID = ldaID, curves = "smooth")
expect_equal(res1, res7)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_plotTopicWord.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ test_that("plotTopicWord", {
expect_true(all(res5$date == res1$date), all(colnames(res1) == colnames(res5)),
all(res5[, -1] <= 1))
res6 <- plotTopicWord(object = obj, docs = LDAdoc, ldaresult = lda,
ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"))
ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"))
expect_equal(res1, res6)
res7 <- plotTopicWord(object = obj, docs = LDAdoc, ldaresult = lda,
ldaID = ldaID, curves = "smooth")
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_plotWordSub.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ test_that("plotWordSub", {
expect_true(all(res5$date == res1$date), all(colnames(res1) == colnames(res5)),
all(res5[, -1] <= 1))
res6 <- plotWordSub(object = obj, ldaresult = lda, ldaID = ldaID,
file = paste0(tempdir(),"/abc.pdf"), search = search)
file = file.path(tempdir(),"abc.pdf"), search = search)
expect_equal(res1, res6)
res7 <- plotWordSub(object = obj, ldaresult = lda, ldaID = ldaID,
curves = "smooth", search = search)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_plotWordpt.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ test_that("plotWordpt", {
expect_true(all(res5$date == res1$date), all(colnames(res1) == colnames(res5)),
all(res5[, -1] <= 1))
res6 <- plotWordpt(object = obj, docs = LDAdoc, ldaresult = lda,
ldaID = ldaID, file = paste0(tempdir(),"/abc.pdf"))
ldaID = ldaID, file = file.path(tempdir(),"abc.pdf"))
expect_equal(res1, res6)
res7 <- plotWordpt(object = obj, docs = LDAdoc, ldaresult = lda,
ldaID = ldaID, curves = "smooth")
Expand Down
4 changes: 2 additions & 2 deletions vignettes/Vignette.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ pagesLDA <- LDAprep(text = corpusFiltered$text, vocab = words5)
After receiving the words which appear at least six times in the whole filtered corpus, the function \texttt{LDAprep} is applied to the example corpus with \texttt{vocab = words5}. The object \texttt{pagesLDA} will be committed to the function which performs a latent Dirichlet allocation.

## Performing LDA - \texttt{LDAgen}
The function that has to be applied first to the corpus prepared by \texttt{LDAprep} is \texttt{LDAgen}. The function offers the options \texttt{K} (\texttt{integer}, default: \texttt{K = 100L}) to set the number of topics, \texttt{vocab} (\texttt{character} vector) for specifying the words which are considered in the preparation of the corpus and several more e.g. number of iterations for the burnin (default: \texttt{burnin = 70}) and the number of iterations for the Gibbs sampler (default: \texttt{num.iterations = 200}). The result is saved in a \texttt{R} workspace, the first part of the results name can be specified by setting the option \texttt{folder} (default: \texttt{folder = paste0(tempdir(),"/lda-result")}). If you want to save your data permanent, you have to change the path in an non temporary one.
The function that has to be applied first to the corpus prepared by \texttt{LDAprep} is \texttt{LDAgen}. The function offers the options \texttt{K} (\texttt{integer}, default: \texttt{K = 100L}) to set the number of topics, \texttt{vocab} (\texttt{character} vector) for specifying the words which are considered in the preparation of the corpus and several more e.g. number of iterations for the burnin (default: \texttt{burnin = 70}) and the number of iterations for the Gibbs sampler (default: \texttt{num.iterations = 200}). The result is saved in a \texttt{R} workspace, the first part of the results name can be specified by setting the option \texttt{folder} (default: \texttt{folder = file.path(tempdir(),"lda-result")}). If you want to save your data permanent, you have to change the path in an non temporary one.

In the concrete example corpus the manipulated corpus \texttt{pagesLDA} is used for \texttt{documents}, the topic number is set to \texttt{K = 10} and for reproducibility a seed is set to \texttt{seed = 123}. The filename consists of the \texttt{folder} argument followed by the options of \texttt{K}, \texttt{num.iterations}, \texttt{burnin} and the \texttt{seed} of the LDA. The hyperparameter \texttt{alpha} and \texttt{eta} are set to $1/K$ by default.
```{r, eval = TRUE, include = FALSE}
Expand All @@ -366,7 +366,7 @@ load("lda-result-k10i200b70s123alpha0.1eta0.1.RData")

```{r, eval = FALSE}
result <- LDAgen(documents = pagesLDA, K = 10L, vocab = words5, seed = 123)
load(paste0(tempdir(),"/lda-result-k10i200b70s123alpha0.1eta0.1.RData"))
load(file.path(tempdir(),"lda-result-k10i200b70s123alpha0.1eta0.1.RData"))
```

For validation of the LDA result and further analysis, the result is loaded back to the workspace.
Expand Down

0 comments on commit a282d31

Please sign in to comment.