diff --git a/docs/articles/get_started.html b/docs/articles/get_started.html index ed4fce3..c564ba6 100644 --- a/docs/articles/get_started.html +++ b/docs/articles/get_started.html @@ -37,7 +37,7 @@
@@ -104,7 +104,7 @@get_started.Rmd
graphTweets
4.0 has been redisigned to work hand-in-hand with rtweet
. Let’s start by getting some tweets. If you’re unsure how to get started, head over to the rtweet website, everything is very well explained. We’ll get 1,000 tweets on #rstats, exluding re-tweets.
library(rtweet)
-
-# 1'000 tweets on #rstats, excluding retweets
-tweets <- search_tweets("#rstats", n = 500, include_rts = FALSE)
library(rtweet)
+
+# 1'000 tweets on #rstats, excluding retweets
+tweets <- search_tweets("#rstats", n = 500, include_rts = FALSE)
Now we can start using graphTweets
.
gt_edges
.If you do not want to return an igraph
object, use gt_collect
, it will return a list of two data.frames; edges and nodes.
tweets %>%
- gt_edges(screen_name, mentions_screen_name) %>%
- gt_collect() -> edges
-
-names(edges)
-#> [1] "edges" "nodes"
tweets %>%
+ gt_edges(screen_name, mentions_screen_name) %>%
+ gt_collect() -> edges
+
+names(edges)
+#> [1] "edges" "nodes"
(It also returns nodes
but it’s empty since we only ran gt_edges
).
So far we only used gt_edges
to extract the edges, we can also extract the nodes.
tweets %>%
- gt_edges(screen_name, mentions_screen_name) %>%
- gt_nodes() %>%
- gt_collect() -> graph
-
-lapply(graph, nrow) # number of edges and nodes
-#> $edges
-#> [1] 299
-#>
-#> $nodes
-#> [1] 354
-lapply(graph, names) # names of data.frames returned
-#> $edges
-#> [1] "source" "target" "n"
-#>
-#> $nodes
-#> [1] "nodes" "type" "n"
tweets %>%
+ gt_edges(screen_name, mentions_screen_name) %>%
+ gt_nodes() %>%
+ gt_collect() -> graph
+
+lapply(graph, nrow) # number of edges and nodes
+#> $edges
+#> [1] 271
+#>
+#> $nodes
+#> [1] 350
+lapply(graph, names) # names of data.frames returned
+#> $edges
+#> [1] "source" "target" "n"
+#>
+#> $nodes
+#> [1] "nodes" "type" "n"
On graphTweets
version 0.4.1
gt_nodes
returns the number of edges the node is present in: n_edges
. Here I used gt_collect
, you can, again, use gt_graph
if you want to return an igraph
object.
Adding nodes has not bring much to table however, gt_nodes
takes another argument, meta
, which if set to TRUE
will return meta data on each node, where availbale*. More information on passing meta data to nodes further down the document.
tweets %>%
- gt_edges(screen_name, mentions_screen_name) %>%
- gt_nodes(meta = TRUE) %>%
- gt_collect() -> graph
-
-# lapply(graph, names) # names of data.frames returned
tweets %>%
+ gt_edges(screen_name, mentions_screen_name) %>%
+ gt_nodes(meta = TRUE) %>%
+ gt_collect() -> graph
+
+# lapply(graph, names) # names of data.frames returned
Note that you can also pass meta-data to edges if needed.
-tweets %>%
- gt_edges(screen_name, mentions_screen_name, created_at) %>%
- gt_nodes(meta = TRUE) %>%
- gt_collect() -> graph
tweets %>%
+ gt_edges(screen_name, mentions_screen_name, created_at) %>%
+ gt_nodes(meta = TRUE) %>%
+ gt_collect() -> graph
Before we plot out graph, we’re going to modify some of the meta-data, a lot of NA
are returned (where the meta-data was not available *).
Here I use sigmajs to plot the graph.
-library(dplyr)
-library(sigmajs) # for plots
-#> Welcome to sigmajs
-#>
-#> Docs: sigmajs.john-coene.com
-
-tweets %>%
- gt_edges(screen_name, mentions_screen_name) %>%
- gt_nodes() %>%
- gt_collect() -> gt
-
-nodes <- gt$nodes %>%
- mutate(
- id = nodes,
- label = nodes,
- size = n,
- color = "#1967be"
- )
-
-edges <- gt$edges %>%
- mutate(
- id = 1:n()
- )
-
-sigmajs() %>%
- sg_force_start() %>%
- sg_nodes(nodes, id, label, size, color) %>%
- sg_edges(edges, id, source, target) %>%
- sg_force_stop(10000)
Let’s look at communities, we’ll return an igraph
object with gt_graph
so we can easily run a community finding algorithm from the igraph
package.
tweets %>%
- gt_edges(screen_name, mentions_screen_name) %>%
- gt_graph() -> g
-
-class(g)
-#> [1] "igraph"
library(dplyr)
+library(sigmajs) # for plots
+#> Welcome to sigmajs
+#>
+#> Docs: sigmajs.john-coene.com
+
+tweets %>%
+ gt_edges(screen_name, mentions_screen_name) %>%
+ gt_nodes() %>%
+ gt_collect() -> gt
+
+nodes <- gt$nodes %>%
+ mutate(
+ id = nodes,
+ label = nodes,
+ size = n,
+ color = "#1967be"
+ )
+
+edges <- gt$edges %>%
+ mutate(
+ id = 1:n()
+ )
+
+sigmajs() %>%
+ sg_force_start() %>%
+ sg_nodes(nodes, id, label, size, color) %>%
+ sg_edges(edges, id, source, target) %>%
+ sg_force_stop(10000)
Let’s look at communities, we’ll return an igraph
object with gt_graph
so we can easily run a community finding algorithm from the igraph
package.
You can also build networks of retweets.
-tweets <- search_tweets("#rstats filter:retweets", n = 500, include_rts = TRUE, token = token, lang = "en")
-#> Searching for tweets...
-#> Finished collecting tweets!
net <- tweets %>%
- gt_edges(screen_name, retweet_screen_name) %>%
- gt_nodes() %>%
- gt_collect()
-
-c(edges, nodes) %<-% net
-
-edges$id <- 1:nrow(edges)
-edges$size <- edges$n
-
-nodes$id <- nodes$nodes
-nodes$label <- nodes$nodes
-nodes$size <- nodes$n
-
-sigmajs() %>%
- sg_nodes(nodes, id, size, label) %>%
- sg_edges(edges, id, source, target) %>%
- sg_layout() %>%
- sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
- sg_settings(
- edgeColor = "default",
- defaultEdgeColor = "#d3d3d3"
- ) %>%
- sg_neighbours()
tweets <- search_tweets("#rstats filter:retweets", n = 500, include_rts = TRUE, token = token, lang = "en")
+#> Searching for tweets...
+#> Finished collecting tweets!
net <- tweets %>%
+ gt_edges(screen_name, retweet_screen_name) %>%
+ gt_nodes() %>%
+ gt_collect()
+
+c(edges, nodes) %<-% net
+
+edges$id <- 1:nrow(edges)
+edges$size <- edges$n
+
+nodes$id <- nodes$nodes
+nodes$label <- nodes$nodes
+nodes$size <- nodes$n
+
+sigmajs() %>%
+ sg_nodes(nodes, id, size, label) %>%
+ sg_edges(edges, id, source, target) %>%
+ sg_layout() %>%
+ sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
+ sg_settings(
+ edgeColor = "default",
+ defaultEdgeColor = "#d3d3d3"
+ ) %>%
+ sg_neighbours()
We can bind quoted tweets (surely they should be considered as retweets) using gt_bind_edges
.
net <- tweets %>%
- gt_edges(screen_name, retweet_screen_name) %>%
- gt_edges_bind(screen_name, quoted_screen_name) %>%
- gt_nodes() %>%
- gt_collect()
-
-c(edges, nodes) %<-% net
-
-edges$id <- 1:nrow(edges)
-edges$size <- edges$n
-
-nodes$id <- nodes$nodes
-nodes$label <- nodes$nodes
-nodes$size <- nodes$n
-
-sigmajs() %>%
- sg_nodes(nodes, id, size, label) %>%
- sg_edges(edges, id, source, target) %>%
- sg_layout() %>%
- sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
- sg_settings(
- edgeColor = "default",
- defaultEdgeColor = "#d3d3d3"
- ) %>%
- sg_neighbours()
## Meta data
+net <- tweets %>%
+ gt_edges(screen_name, retweet_screen_name) %>%
+ gt_edges_bind(screen_name, quoted_screen_name) %>%
+ gt_nodes() %>%
+ gt_collect()
+
+c(edges, nodes) %<-% net
+
+edges$id <- 1:nrow(edges)
+edges$size <- edges$n
+
+nodes$id <- nodes$nodes
+nodes$label <- nodes$nodes
+nodes$size <- nodes$n
+
+sigmajs() %>%
+ sg_nodes(nodes, id, size, label) %>%
+ sg_edges(edges, id, source, target) %>%
+ sg_layout() %>%
+ sg_cluster(colors = c("#0C46A0FF", "#41A5F4FF")) %>%
+ sg_settings(
+ edgeColor = "default",
+ defaultEdgeColor = "#d3d3d3"
+ ) %>%
+ sg_neighbours()
You can pass meta data to the edges and subsequently to the nodes using gt_add_meta
.
gt <- tweets %>%
- gt_edges(screen_name, retweet_screen_name, followers_count, retweet_followers_count) %>%
- gt_nodes() %>%
- gt_add_meta(name = size, source = followers_count, target = retweet_followers_count)
-
-# size is now number of followers
-head(gt$nodes)
-#> # A tibble: 6 x 4
-#> nodes type n size
-#> <chr> <chr> <int> <int>
-#> 1 _reactdev user 3 2482
-#> 2 _serverlessbot_ user 1 195
-#> 3 0cool1 user 1 308
-#> 4 2bftawfik user 2 70
-#> 5 aad34210 user 1 1013
-#> 6 aambrus1 user 1 241
-
-gt$edges$id <- 1:nrow(gt$edges)
-gt$nodes$id <- gt$nodes$nodes
-gt$nodes$label <- gt$nodes$nodes
-gt$nodes$color <- scales::col_numeric(c("#41A5F4FF", "#0C46A0FF"), NULL)(gt$nodes$size)
-
-sigmajs() %>%
- sg_nodes(gt$nodes, id, size, label, color) %>%
- sg_edges(gt$edges, id, source, target) %>%
- sg_layout() %>%
- sg_settings(
- edgeColor = "default",
- defaultEdgeColor = "#d3d3d3"
- ) %>%
- sg_neighbours()
* Some nodes are mentioned in tweets only and therefore have no meta-data associated.
+gt <- tweets %>%
+ gt_edges(screen_name, retweet_screen_name, followers_count, retweet_followers_count) %>%
+ gt_nodes() %>%
+ gt_add_meta(name = size, source = followers_count, target = retweet_followers_count)
+
+# size is now number of followers
+head(gt$nodes)
+#> # A tibble: 6 x 4
+#> nodes type n size
+#> <chr> <chr> <int> <int>
+#> 1 _ddjlab user 1 125
+#> 2 _hyperseven_ user 1 6194
+#> 3 _lazappi_ user 1 919
+#> 4 _lionelhenry user 1 834
+#> 5 _reactdev user 5 2537
+#> 6 _sevillar user 1 373
+
+gt$edges$id <- 1:nrow(gt$edges)
+gt$nodes$id <- gt$nodes$nodes
+gt$nodes$label <- gt$nodes$nodes
+gt$nodes$color <- scales::col_numeric(c("#41A5F4FF", "#0C46A0FF"), NULL)(gt$nodes$size)
+
+sigmajs() %>%
+ sg_nodes(gt$nodes, id, size, label, color) %>%
+ sg_edges(gt$edges, id, source, target) %>%
+ sg_layout() %>%
+ sg_settings(
+ edgeColor = "default",
+ defaultEdgeColor = "#d3d3d3"
+ ) %>%
+ sg_neighbours()
* Some nodes are mentioned in tweets only and therefore have no meta-data associated.