Skip to content

Commit

Permalink
Adding package to github
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt committed Aug 27, 2017
0 parents commit 7d08e9b
Show file tree
Hide file tree
Showing 16 changed files with 612 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
20 changes: 20 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Package: ggpage
Type: Package
Title: Creates Page Layout Visualizations in R
Version: 0.1.0
Authors@R: person("Emil", "Hvitfeldt", email = "emilhhvitfeldt@gmail.com", role = c("aut", "cre"))
Description: This package facilitates the creation of page layout
visualizations in which words are represented as rectangles with sizes
relating to the length of the words. Which then is divided in lines and
pages for easy overview of up to quite large texts.
Depends:
R (>= 3.0.0)
Imports:
dplyr (>= 0.7.0),
ggplot2 (>= 2.0.0),
stringr (>= 1.2.0),
tidytext (>= 0.1.0)
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exportPattern("^[[:alpha:]]+")
14 changes: 14 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#' The tinder-box by H.C. Andersen
#'
#' A tidy data.frame containing the entire story of The tinder-box by H.C.
#' Andersen with two columns: \code{text} which contains the text of the
#' fairy tale divided into elements of up to about 80 characters each and
#' \code{book} giving the name of the fairy tale in question.
#'
#' @format A data frame with 211 rows and 2 variables:
#' \describe{
#' \item{text}{character string up to 80 characters each}
#' \item{book}{nam of the fairy tale}
#' ...
#' }
"tinderbox"
142 changes: 142 additions & 0 deletions R/ggpage_build.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#' Creates a data frame for further analysis and plotting
#'
#' This function can be used in combination with \code{ggpage_plot} to get the
#' same result as \code{ggpage_quick}. However by splitting the data.frame
#' construction and plotting we are able to do intermediate analysis which
#' can be included in the visualization.
#'
#' @param book Character or data.frame. Can either have each element be a
#' seperate line or having each element being seperate words.
#' @param lpp Numeric. Lines Per Page. numver of lines allocated for each page.
#' @param character_height Numeric. Relative size of the height of each letter
#' compared to its width.
#' @param vertical_space Numeric. Distance between each lines vertically.
#' @param x_space_pages Numeric. Distence between pages along the x-axis.
#' @param y_space_pages Numeric. Distence between pages along the y-axis.
#' @param nrow Numeric. Number of rows of pages, if omitted defaults to square
#' layout.
#' @param ncol Numeric. Number of columns of pages, if omitted defaults to
#' square layout.
#' @param bycol Logical. If TRUE (the default) the matrix is filled by
#' columns, otherwise the matrix is filled by rows.
#' @return `tibble` containing the following columns:
#'
#' * `word`: Character. The words of the text.
#' * `page`: Integer. Page number.
#' * `line`: Integer. Line number within the page.
#' * `xmin`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#' alter.
#' * `xmax`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#' alter.
#' * `ymin`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#' alter.
#' * `ymax`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#' alter.
#' @examples
#' # build and plot
#' ## tibble with full lines
#' ggpage_build(tinderbox)) %>%
#' ggpage_plot()
#' ## vector with full lines
#' ggpage_build(book = tinderbox) %>%
#' pull(text)) %>%
#' ggpage_plot()
#' ## tibble with single words
#' ggpage_build(tinderbox) %>%
#' unnest_tokens(text, text)) %>%
#' ggpage_plot()
#' ## vector with single words
#' ggpage_build(tinderbox) %>%
#' unnest_tokens(text, text) %>%
#' pull(text)) %>%
#' ggpage_plot()
#'
#' # nrow and ncol
#' ggpage_build(tinderbox, nrow = 2) %>%
#' ggpage_plot()
#' ggpage_build(tinderbox, ncol = 2) %>%
#' ggpage_plot()
#'
#' # Include analysis within
#' ggpage_build(tinderbox) %>%
#' mutate(word_length = str_length(word)) %>%
#' ggpage_plot(aes(fill = word_length))
ggpage_build <- function(book, lpp = 25, character_height = 3,
vertical_space = 1, x_space_pages = 10,
y_space_pages = 10, nrow = NULL, ncol = NULL,
bycol = TRUE) {

if(!any(class(book) %in% c("character", "data.frame"))) {
stop("Please supply character string or data.frame.")
}

# Makes strings to tibbles
if (inherits(book, "character")) {
book <- tibble(text = book)
}

# Makes single words to lines
if (book %>% slice(1:25) %>% pull(text) %>% str_detect(" ") %>%
mean() < 0.9) {
book <- tibble(text = word_to_line(book))
}

# Data table with full lines needed here
data <- book %>%
mutate(index_line = row_number(),
page = rep(1:ceiling(n() / lpp), length.out = n(), each = lpp)) %>%
group_by(page) %>%
mutate(line = row_number()) %>%
ungroup() %>%
unnest_tokens(output = word, input = text) %>%
mutate(word_length = str_length(word)) %>%
group_by(index_line) %>%
mutate(first_word = lag(line, default = 0) != line,
x_space_right = cumsum(word_length + 1) - 1,
x_space_left = cumsum(lag(word_length + 1, default = 0))) %>%
ungroup()

# Longest line
max_line_length <- book %>% pull(text) %>% str_length() %>% max()

# Add page spacing
num_pages <- data %>% pull(page) %>% n_distinct()
if (!is.null(nrow) || !is.null(ncol)) {
if (!is.null(ncol)) {
n_row_y <- ncol
n_row_x <- ceiling(num_pages / n_row_y)
}
if (!is.null(nrow)) {
n_row_x <- nrow
n_row_y <- ceiling(num_pages / n_row_x)
}
} else {
n_row_x <- n_row_y <- ceiling(sqrt(num_pages))
}

if (bycol) {
page_spacing <- tibble(
page = 1:num_pages,
x_page = rep(1:n_row_x, length.out = num_pages, each = n_row_y),
y_page = rep(1:n_row_y, length.out = num_pages)
)
} else {
page_spacing <- tibble(
page = 1:num_pages,
x_page = rep(1:n_row_x, length.out = num_pages),
y_page = rep(1:n_row_y, length.out = num_pages, each = n_row_x)
)
}

data %>% left_join(page_spacing, by = "page") %>%
mutate(xmin = x_space_right + x_page * (max_line_length + x_space_pages),
xmax = x_space_left + x_page * (max_line_length + x_space_pages),
ymin = - line * (character_height + vertical_space) - y_page *
(lpp * (character_height + vertical_space) + y_space_pages),
ymax = - line * (character_height + vertical_space) -
character_height - y_page *
(lpp * (character_height + vertical_space) + y_space_pages)) %>%
select(-index_line, -word_length, -first_word, -x_space_right,
-x_space_left, -x_page, -y_page) %>%
select(word, everything())
}
43 changes: 43 additions & 0 deletions R/ggpage_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Creates a visualization from the ggpage_build output
#'
#' @param data data.frame. Expects output from \code{ggpage_build} with
#' optional intermediate analysis.
#' @param mapping Default list of aesthetic mappings to use for plot to be
#' handed to internal \code{ggplot} call.
#' @return A ggplot object with the given visualization.
#' @examples
#' # build and plot
#' ## tibble with full lines
#' ggpage_build(tinderbox)) %>%
#' ggpage_plot()
#' ## vector with full lines
#' ggpage_build(book = tinderbox) %>%
#' pull(text)) %>%
#' ggpage_plot()
#' ## tibble with single words
#' ggpage_build(tinderbox) %>%
#' unnest_tokens(text, text)) %>%
#' ggpage_plot()
#' ## vector with single words
#' ggpage_build(tinderbox) %>%
#' unnest_tokens(text, text) %>%
#' pull(text)) %>%
#' ggpage_plot()
#'
#' # nrow and ncol
#' ggpage_build(tinderbox, nrow = 2) %>%
#' ggpage_plot()
#' ggpage_build(tinderbox, ncol = 2) %>%
#' ggpage_plot()
#'
#' # Include analysis within
#' ggpage_build(tinderbox) %>%
#' mutate(word_length = str_length(word)) %>%
#' ggpage_plot(aes(fill = word_length))
ggpage_plot <- function(data, mapping = aes()) {
data %>%
ggplot(mapping = mapping) +
geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)) +
coord_fixed(ratio = 1) +
theme_void()
}
117 changes: 117 additions & 0 deletions R/ggpage_quick.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#' Creates a quick visualization of the page layout
#'
#' @param book Character or data.frame. Can either have each element be a
#' seperate line or having each element being seperate words.
#' @param lpp Numeric. Lines Per Page. numver of lines allocated for each page.
#' @param character_height Numeric. Relative size of the height of each letter
#' compared to its width.
#' @param vertical_space Numeric. Distance between each lines vertically.
#' @param x_space_pages Numeric. Distence between pages along the x-axis.
#' @param y_space_pages Numeric. Distence between pages along the y-axis.
#' @param nrow Numeric. Number of rows of pages, if omitted defaults to square
#' layout.
#' @param ncol Numeric. Number of columns of pages, if omitted defaults to
#' square layout.
#' @param bycol Logical. If TRUE (the default) the matrix is filled by
#' columns, otherwise the matrix is filled by rows.
#' @return A ggplot object with the given visualization.
#' @examples
#' # quick
#' ## tibble with full lines
#' ggpage_quick(tinderbox)
#' ## vector with full lines
#' ggpage_quick(tinderbox %>% pull(text))
#' ## tibble with single words
#' ggpage_quick(tinderbox %>% unnest_tokens(text, text))
#' ## vector with single words
#' ggpage_quick(tinderbox %>% unnest_tokens(text, text) %>% pull(text))
#'
#' # nrow and ncol
#' ggpage_quick(tinderbox, nrow = 2)
#' ggpage_quick(tinderbox, ncol = 2)
ggpage_quick <- function(book, lpp = 25, character_height = 3,
vertical_space = 1, x_space_pages = 10,
y_space_pages = 10, nrow = NULL, ncol = NULL,
bycol = TRUE) {

if(!any(class(book) %in% c("character", "data.frame"))) {
stop("Please supply character string or data.frame.")
}

# Makes strings to tibbles
if (inherits(book, "character")) {
book <- tibble(text = book)
}

# Makes single words to lines
if (book %>% slice(1:25) %>% pull(text) %>% str_detect(" ") %>%
mean() < 0.9) {
book <- tibble(text = word_to_line(book))
}

# Data table with full lines needed here
data <- book %>%
mutate(index_line = row_number(),
page = rep(1:ceiling(n() / lpp), length.out = n(), each = lpp)) %>%
group_by(page) %>%
mutate(line = row_number()) %>%
ungroup() %>%
unnest_tokens(output = word, input = text) %>%
mutate(word_length = str_length(word)) %>%
group_by(index_line) %>%
mutate(first_word = lag(line, default = 0) != line,
x_space_right = cumsum(word_length + 1) - 1,
x_space_left = cumsum(lag(word_length + 1, default = 0))) %>%
ungroup()

# Longest line
max_line_length <- book %>% pull(text) %>% str_length() %>% max()

# Add page spacing
num_pages <- data %>% pull(page) %>% n_distinct()
if (!is.null(nrow) || !is.null(ncol)) {
if (!is.null(ncol)) {
n_row_y <- ncol
n_row_x <- ceiling(num_pages / n_row_y)
}
if (!is.null(nrow)) {
n_row_x <- nrow
n_row_y <- ceiling(num_pages / n_row_x)
}
} else {
n_row_x <- n_row_y <- ceiling(sqrt(num_pages))
}

if (bycol) {
page_spacing <- tibble(
page = 1:num_pages,
x_page = rep(1:n_row_x, length.out = num_pages, each = n_row_y),
y_page = rep(1:n_row_y, length.out = num_pages)
)
} else {
page_spacing <- tibble(
page = 1:num_pages,
x_page = rep(1:n_row_x, length.out = num_pages),
y_page = rep(1:n_row_y, length.out = num_pages, each = n_row_x)
)
}

data_1 <- data %>%
left_join(page_spacing, by = "page")

data_1 %>%
ggplot() +
geom_rect(aes(xmin = x_space_right + x_page *
(max_line_length + x_space_pages),
xmax = x_space_left + x_page *
(max_line_length + x_space_pages),
ymin = - line * (character_height + vertical_space) -
y_page * (lpp * (character_height + vertical_space) +
y_space_pages),
ymax = - line * (character_height + vertical_space) -
character_height - y_page *
(lpp * (character_height + vertical_space) +
y_space_pages))) +
coord_fixed(ratio = 1) +
theme_void()
}
16 changes: 16 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' Internal function for converting words to lines
#' @param words Character. Having each element being seperate words.
#' @param wot_number Numeric. how many words to split whole string by.
#' @return Character. have each element be a seperate line.
#' @keywords internal
word_to_line <- function(words, wot_number = 1000) {
words %>%
mutate(split = rep(1:ceiling(n() / wot_number),
by = wot_number, length.out = n())) %>%
split(.$split) %>%
map(~ .x %>% pull(text) %>%
str_c(collapse = " ") %>%
str_wrap() %>%
str_split("\n *")) %>%
unlist()
}
Binary file added data/tinderbox.rda
Binary file not shown.
20 changes: 20 additions & 0 deletions ggpage.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: knitr
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
Loading

0 comments on commit 7d08e9b

Please sign in to comment.