/
emoji-extraction.R
77 lines (63 loc) · 2.61 KB
/
emoji-extraction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#' Emoji extraction unnested summary
#'
#' If users would like to know how many Emojis and what kinds of Emojis each
#' Tweet has, \code{emoji_extract} is a useful function to output a global
#' summary with the row number of each Tweet containing Emoji and the Unicodes
#' associated with each Tweet.
#'
#' @inheritParams emoji_summary
#'
#' @import dplyr
#' @import stringr
#' @import tidyr
#' @return A summary tibble with the original row number and Emoji count.
#' @export
#' @examples
#' library(dplyr)
#' data.frame(tweets = c("I love tidyverse \U0001f600\U0001f603\U0001f603",
#' "R is my language! \U0001f601\U0001f606\U0001f605",
#' "This Tweet does not have Emoji!",
#' "Wearing a mask\U0001f637\U0001f637\U0001f637.",
#' "Emoji does not appear in all Tweets",
#' "A flag \U0001f600\U0001f3c1")) %>%
#' emoji_extract_unnest(tweets)
#'
emoji_extract_unnest <- function(tweet_tbl, tweet_text){
tweet_tbl %>%
emoji_extract_nest({{ tweet_text }}) %>%
dplyr::select({{ tweet_text }}, .emoji_unicode) %>%
dplyr::mutate(row_number = dplyr::row_number()) %>%
tidyr::unnest(.emoji_unicode) %>%
dplyr::group_by(row_number, .emoji_unicode) %>%
dplyr::summarize(.emoji_count = dplyr::n()) %>%
dplyr::ungroup()
}
#' Emoji extraction nested summary
#'
#' This function adds an extra list column called \code{.emoji_unicode} to the
#' original data, with all Emojis included.
#'
#' @inheritParams emoji_summary
#'
#' @import dplyr
#' @import stringr
#' @import emoji
#' @return The original dataframe/tibble with an extra column collumn called
#' \code{.emoji_unicode}.
#' @export
#' @examples
#' library(dplyr)
#' data.frame(tweets = c("I love tidyverse \U0001f600\U0001f603\U0001f603",
#' "R is my language! \U0001f601\U0001f606\U0001f605",
#' "This Tweet does not have Emoji!",
#' "Wearing a mask\U0001f637\U0001f637\U0001f637.",
#' "Emoji does not appear in all Tweets",
#' "A flag \U0001f600\U0001f3c1")) %>%
#' emoji_extract_nest(tweets)
emoji_extract_nest <- function(tweet_tbl, tweet_text){
tweet_tbl %>%
dplyr::mutate(.emoji_unicode = stringr::str_extract_all({{ tweet_text }}, emoji::emojis %>%
dplyr::filter(!str_detect(name, "keycap: \\*")) %>%
dplyr::pull(emoji) %>%
paste(., collapse = "|")))
}