Skip to content

Commit

Permalink
WA: datemining, authormining
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasRieger committed Aug 23, 2019
1 parent 9867b0a commit 589151e
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 3 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ export(LDAprep)
export(as.corpus.textmeta)
export(as.meta)
export(as.textmeta.corpus)
export(authormining.WA)
export(cleanTexts)
export(clusterTopics)
export(datemining.WA)
export(deleteAndRenameDuplicates)
export(duplist)
export(filterCount)
Expand Down
65 changes: 63 additions & 2 deletions R/readWhatsApp.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#' Read WhatsApp files
#'
#' Reads HTML-files from WhatsApp and separates the text and meta data.
#' Reads HTML-files from WhatsApp and separates the text and meta data. The
#' functions \code{datemining} and \code{authormining} can be used to deduce
#' some missing values (concerning the author or date tag) from the data itself.
#'
#' @param path Character: string with path where the data files are.
#' If only \code{path} is given, \code{file} will be determined by searching
Expand All @@ -24,7 +26,7 @@ readWhatsApp = function(path, file){
if(missing(path)){
return(readWhatsApp.file(file = file))
}
return(readWhatsApp(file = file.path(path, file)))
return(readWhatsApp.file(file = file.path(path, file)))
}

readWhatsApp.file = function(file){
Expand Down Expand Up @@ -156,3 +158,62 @@ readWhatsApp.file = function(file){
paste(textandemojis[indsplit[1,x]:indsplit[2,x]], collapse = " "))
return(textandemojis)
}

#' @rdname readWhatsApp
#' @param object \code{\link{textmeta}} object, result from readWhatsapp.
#' @export datemining.WA
datemining.WA = function(object){

noDate = which(object$meta$userMessage & is.na(object$meta$date))

for (i in seq_along(noDate)){
tmp = which.max(object$meta$systemDate[seq(noDate[i], nrow(object$meta))]) #findet ersten TRUE
tmp = tmp + noDate[i] - 1 #berechnet Index
nextDate = object$text[[object$meta$id[tmp]]]

tmp = which.max(object$meta$systemDate[seq(noDate[i], 1)]) #findet ersten TRUE rueckwaerts
tmp = noDate[i] - tmp + 1 #berechnet Index
prevDate = object$text[[object$meta$id[tmp]]]

nextDate = as.Date(nextDate, format = "%d.%m.%Y")
prevDate = as.Date(prevDate, format = "%d.%m.%Y")
daydiff = as.numeric(difftime(nextDate, prevDate, "day"))
if(!is.na(daydiff) && daydiff == 1){
object$meta$date[noDate[i]] = prevDate
}
}

newnoDate = noDate[is.na(object$meta$date[noDate])]

for (i in seq_along(newnoDate)){
tmp = which.max(!is.na(object$meta$date[seq(newnoDate[i], nrow(object$meta))])) #findet ersten TRUE
ind1 = tmp + newnoDate[i] - 1 #berechnet Index
nextDate = object$meta$date[ind1]

tmp = which.max(!is.na(object$meta$date[seq(newnoDate[i], 1)])) #findet ersten TRUE rueckwaerts
ind2 = newnoDate[i] - tmp + 1 #berechnet Index
prevDate = object$meta$date[ind2]

daydiff = as.numeric(difftime(nextDate, prevDate, "day"))
if(!is.na(daydiff) && daydiff == 0){
object$meta$date[newnoDate[i]] = prevDate
}
if(!any(object$meta$systemDate[ind2:newnoDate[i]])){
object$meta$date[newnoDate[i]] = prevDate
}
if(!any(object$meta$systemDate[ind1:newnoDate[i]])){
object$meta$date[newnoDate[i]] = nextDate
}
}

return(object)
}

#' @rdname readWhatsApp
#' @export authormining.WA
authormining.WA = function(object){
warning("\nadding author for user observations (replacing NAs by setting author to ID-prefix, which is usually generated by recycling the filename): make sure that all user messages without author tag are sent from the main user in each chat!")
noAuthor = which(object$meta$userMessage & is.na(object$meta$author))
object$meta$author[noAuthor] = gsub("[0-9]", "", object$meta$id[noAuthor])
return(object)
}
12 changes: 11 additions & 1 deletion man/readWhatsApp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 589151e

Please sign in to comment.