Displayr · mwmclean · Nov 14, 2019 · Nov 7, 2019
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cleanNLP
 Type: Package
 Title: A Tidy Data Model for Natural Language Processing
-Version: 2.4.2
+Version: 2.4.3
 Authors@R: c(person(given = "Justin", family = "Wishart", email = "justin.wishart@displayr.com", role = "ctb"),
     person(given = "Taylor B.", family = "Arnold", email = "taylor.arnold@acm.org", role = c("aut", "cre")))
 Description: Provides a set of fast tools for converting a textual corpus into a set of normalized

diff --git a/R/ner_annotate.R b/R/ner_annotate.R
@@ -9,8 +9,6 @@
 #' in the document whre each entity occurs and the entity type. If no entities are detected for a
 #' document then an empty data.frame with no rows is returned.
 #'
-#' @param input.file a character string showing the path to the file to be processed. The file should
-#'    have text with Unix style line endings (will throw Nullpointer exception if not)
 #' @param entity.mentions.only Logical to specify if only entity mention output from CoreNLP is used
 #'    in the extraction. If TRUE, this will extract personal pronouns as well as standard entities.
 #'    The benefit of the entity.mention output is it groups words that are from the same entity. E.g.
@@ -42,17 +40,19 @@
 #'             "true")
 #' 
 #' cnlp_init_corenlp_custom(language = "en", mem = "2g", keys = keys, values = values)
-#' simple.output <- NERAnnotate(input.file)
+#' simple.output <- NERAnnotate()
 #' }
 #' @export
-NERAnnotate <- function(input.file, entity.mentions.only = FALSE) {
-  
+NERAnnotate <- function(entity.mentions.only = FALSE)
+{
     if(!volatiles$corenlp$init)
         stop("Java CoreNLP not initialized. Named Entity Recognition cannot be executed.")
-
+    if(is.null(volatiles$corenlp$properties$file))
+        stop("Java CoreNLP properties doesn't have an input file path.",
+             "Please set the input file path via cnlp_init_corenlp_custom")
     .jcall(volatiles$corenlp$corenlp, "V", "run")
 
-    output <- fromJSON(paste0(input.file, ".json"))
+    output <- fromJSON(paste0(volatiles$corenlp$properties$file, ".json"))
     ner.mentions = output$sentences$entitymentions
     response = sapply(ner.mentions, nrow)
     if(all(sapply(response, is.null)))
@@ -77,7 +77,7 @@ NERAnnotate <- function(input.file, entity.mentions.only = FALSE) {
                         return(data.frame())
                     }
                 y
-                }, x = output$sentences$tokens, y = ner.mentions)
+                }, x = output$sentences$tokens, y = ner.mentions, SIMPLIFY = FALSE)
             # Check if filtered ner is not empty
             if(all(sapply(ner.mentions, nrow) == 0))
                 return(data.frame(id = character(), entity = character(), entity.type = character()))

diff --git a/man/NERAnnotate.Rd b/man/NERAnnotate.Rd
diff --git a/tests/testthat/test-entity.R b/tests/testthat/test-entity.R
@@ -26,8 +26,15 @@ simple.with.pronouns.expected <- structure(list(id = c(1L, 1L, 3L, 4L, 4L, 4L),
 
 pronouns <- c("he's", "hes", "he is", "He is", "He Is", "she's", "She is")
 
+all.single.entity <- as.character(1:3)
+
 none.expected <- data.frame(id = character(), entity = character(), entity.type = character())
 
+all.single.output <- structure(list(id = 1:3,
+                                    entity = c("1", "2", "3"),
+                                    entity.type = c("NUMBER", "NUMBER", "NUMBER")),
+                               class = "data.frame", row.names = c(NA, -3L))
+
 # If this is throwing errors that you need to download Core NLP then the way to get testthat to 
 # find CORENLP is to set CORENLP as a system environment variable with the path to CoreNLP
 # CoreNLP directories in the package installation cannot be located by testthat
@@ -45,29 +52,36 @@ test_that("NERAnnotate consistency", {
   values <- c("true", "tokenize,ssplit,pos,lemma,ner", "json", tmp.file, dirname(tmp.file))
 
   # Expect error if NERAnnotate is called before corenlp is initialised.
-  expect_error(NERAnnotate(tmp.file),
+  expect_error(NERAnnotate(),
                "^Java CoreNLP not initialized. Named Entity Recognition cannot be executed.$")
 
   cnlp_init_corenlp_custom(language = "en", mem = "2g", keys = keys, values = values, 
                            corenlp.only = TRUE)
 
-  expect_error(simple.output <- NERAnnotate(tmp.file), NA)
+  expect_error(simple.output <- NERAnnotate(), NA)
   expect_identical(simple.output, simple.expected)
 
-  expect_error(simple.output.with.pronouns <- NERAnnotate(tmp.file, entity.mentions.only = TRUE), NA)
+  expect_error(simple.output.with.pronouns <- NERAnnotate(entity.mentions.only = TRUE), NA)
   expect_identical(simple.output.with.pronouns, simple.with.pronouns.expected)
 
   file <- file(tmp.file, "wb")
   writeLines(none.input, con = file)
   close(file)
 
-  none.output <- NERAnnotate(tmp.file)
+  none.output <- NERAnnotate()
   expect_identical(none.output, none.expected)
 
   file <- file(tmp.file, "wb")
   writeLines(pronouns, con = file)
   close(file)
 
-  expect_error(pronoun.output.after.validation <- NERAnnotate(tmp.file, entity.mentions.only = FALSE), NA)
+  expect_error(pronoun.output.after.validation <- NERAnnotate(entity.mentions.only = FALSE), NA)
   expect_identical(pronoun.output.after.validation, none.expected)
+
+  file <- file(tmp.file, "wb")
+  writeLines(all.single.entity, con = file)
+  close(file)
+
+  expect_error(all.single.entity.output <- NERAnnotate(entity.mentions.only = FALSE), NA)
+  expect_identical(all.single.entity.output, all.single.output)
 })