From 8df173ea0c916a4cfea7e606ac92f430fedf4bd3 Mon Sep 17 00:00:00 2001 From: Jonas Rieger Date: Wed, 28 Aug 2019 15:40:21 +0200 Subject: [PATCH] new tests for WA --- R/readWhatsApp.R | 16 +- tests/testthat/data/WhatsApp/WhatsApp1.html | 6 +- tests/testthat/data/WhatsApp/WhatsApp3.html | 172 ++++++++++++++++++++ tests/testthat/test_readWhatsApp.R | 23 ++- 4 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/data/WhatsApp/WhatsApp3.html diff --git a/R/readWhatsApp.R b/R/readWhatsApp.R index 3ebc362..bb91ca8 100644 --- a/R/readWhatsApp.R +++ b/R/readWhatsApp.R @@ -219,9 +219,19 @@ authormining.WA = function(object){ stopifnot(is.textmeta(object), all(c("author", "userMessage") %in% colnames(object$meta))) - warning("\nadding author for user observations (replacing NAs by setting author to ID-prefix, which is usually generated by recycling the filename): make sure that all user messages without author tag are sent from the main user in each chat!") + warning("\nadding author for user observations (replacing NAs by setting author to the most often author per ID-prefix - or to the ID-prefix itself, if no there is no known author at all): make sure that all user messages without author tag are sent from the main user in each chat!") noAuthor = which(object$meta$userMessage & is.na(object$meta$author)) - object$meta$author[noAuthor] = gsub(pattern = "[0-9]", replacement = "", - x = object$meta$id[noAuthor]) + + ids = gsub(pattern = "\\.[0-9]*", x = object$meta$id, replacement = "") + + object$meta$author[noAuthor] = + sapply(gsub(pattern = "\\.[0-9]*", x = object$meta$id[noAuthor], replacement = ""), + function(x) ifelse(!is.null(names(which.max(table(object$meta$author[ids == x])))), + names(which.max(table(object$meta$author[ids == x]))), NA_character_)) + + noAuthor = which(object$meta$userMessage & is.na(object$meta$author)) + + object$meta$author[noAuthor] = gsub(pattern = "\\.[0-9]*", + x = object$meta$id[noAuthor], replacement = "") return(object) } diff --git a/tests/testthat/data/WhatsApp/WhatsApp1.html b/tests/testthat/data/WhatsApp/WhatsApp1.html index c84eabf..0a991b2 100644 --- a/tests/testthat/data/WhatsApp/WhatsApp1.html +++ b/tests/testthat/data/WhatsApp/WhatsApp1.html @@ -154,7 +154,11 @@ and You Feed Him for a Lifetime: Link -
18:56
+
18:56 + +
19:12
Blind Text
18:57 + +
19:12
Blind Text2
18:59
diff --git a/tests/testthat/data/WhatsApp/WhatsApp3.html b/tests/testthat/data/WhatsApp/WhatsApp3.html new file mode 100644 index 0000000..98e7a23 --- /dev/null +++ b/tests/testthat/data/WhatsApp/WhatsApp3.html @@ -0,0 +1,172 @@ + + + + + +(85) WhatsApp + + + + + + + + + + + + + + + + + + + +
Gruppe
Heute
text B
82
gruppe a
Mittwoch
❄ FAHRT BITTE VORSICHTIG ❄ +Die aktuellen Verkehrsmeldungen hört ihr wie immer in unserem Programm
42
gn
Gestern
19:12
GESTERN
Give a Man a Fish, and You Feed Him for a Day. +Teach a Man To Fish, + +and You Feed Him for a Lifetime: Link + +
18:56 + +
19:12
Blind Text
18:57 + +
19:12
Blind Text2
18:59
Schreib eine Nachricht
+ + + + + +
+ + + + + \ No newline at end of file diff --git a/tests/testthat/test_readWhatsApp.R b/tests/testthat/test_readWhatsApp.R index 7a7f239..3170058 100644 --- a/tests/testthat/test_readWhatsApp.R +++ b/tests/testthat/test_readWhatsApp.R @@ -4,8 +4,9 @@ test_that("readWhatsApp", { corp = readWhatsApp(path = file.path("data", "WhatsApp")) corp1 = readWhatsApp(path = file.path("data", "WhatsApp"), file = "WhatsApp1.html") corp2 = readWhatsApp(path = file.path("data", "WhatsApp"), file = "WhatsApp2.html") + corp3 = readWhatsApp(path = file.path("data", "WhatsApp"), file = "WhatsApp3.html") - expect_error(readWhatsApp(path = file.path("data", "WhatsApp"), file = "WhatsApp3.html")) + expect_error(readWhatsApp(path = file.path("data", "WhatsApp"), file = "WhatsApp4.html")) expect_equal( readWhatsApp(path = file.path("data", "WhatsApp", "WhatsApp1.html")), readWhatsApp(file = file.path("data", "WhatsApp", "WhatsApp1.html"))) @@ -14,12 +15,26 @@ test_that("readWhatsApp", { expect_true(is.textmeta(corp)) expect_true(is.textmeta(corp1)) expect_true(is.textmeta(corp2)) - expect_equal(mergeTextmeta(list(corp1, corp2)), corp) + expect_equal(mergeTextmeta(list(corp1, corp2, corp3)), corp) expect_equal(length(corp$text), nrow(corp$meta)) - expect_equal(length(corp$text), 6) + expect_equal(length(corp$text), 12) expect_equal(length(corp1$text), nrow(corp1$meta)) - expect_equal(length(corp1$text), 2) + expect_equal(length(corp1$text), 4) expect_equal(length(corp2$text), nrow(corp2$meta)) expect_equal(length(corp2$text), 4) + expect_equal(length(corp2$text), nrow(corp3$meta)) + expect_equal(length(corp2$text), 4) + + ## WAmining + corpdate = datemining.WA(corp) + expect_true(corpdate$meta$date[3] == "2018-11-29") + corpdate$meta$date[3] = NA_character_ + expect_equal(corp, corpdate) + + expect_warning(corpauthor <- authormining.WA(corp)) + expect_true(corpauthor$meta$author[3] == "Ressource") + expect_true(all(corpauthor$meta$author[10:12] == "WhatsApp3")) + corpauthor$meta$author[c(3, 10:12)] = NA_character_ + expect_equal(corp, corpauthor) })