<a href="https://colab.research.google.com/github/Noelle-Pastor/Top-American-Authors-in-19th---21st-Century-Literary-Anthologies/blob/main/3_Creating_Plots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Creating Plots**
## From: Previously loaded data tibbles like `author_stats` and `native_lit_stats`.

In [None]:
library(tidyverse)

In [None]:
my_colors <- c("#2B3A67", "#4A618C", "#89A7A7", "#F2B90C")
my_colors2 <- c("#F2B90C", "#3F5C8A")
my_colors3 <- c("#F2B90C", "#89A7A7", "#2B3A67" )

### **Column Chart** by Century - Author vs Page Count

In [None]:
#BAR CHART -- TOP 30 AUTHORS, ONE CENTURY, BY TOTAL PAGES, AND COLOR=PROPORTION

ggplot(author_stats,
       aes(x=total_num_pages, y=reorder(author, total_num_pages),
       fill=proportion)) +

  geom_col() +

  scale_fill_gradient(name = "% of Pages in\n20th Century\nAnthologies\n",
                      low = "#2A3950",
                      high = "#6bc9ff",
                      limits = c(0.0, max(author_stats$proportion)),
                      breaks = c(0.0, 1, 2, 3, 4, 4.7)) +

  labs(title = "30 Most Prominent Authors in 20th Century Anthologies",
       subtitle = "by Total Pages\n",
       x = "Total Pages",
       y = "Author") +

  scale_x_continuous(limits = c(0, max(author_stats$total_num_pages) + 50),
                     breaks = seq(0,
                                  max(author_stats$total_num_pages) + 200,
                                  200)) +

  guides(fill = guide_colorbar(reverse = FALSE)) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5))

In [None]:
# BAR CHART -- TOP 30 AUTHORS, ONE CENTURY, BY TOTAL PAGES, AND COLOR=FREQUENCY
ggplot(author_stats,
       aes(x=total_num_pages, y=reorder(author, total_num_pages),
           fill=factor(frequency))) +

  geom_col() +

  scale_fill_manual(values = my_colors) +

  labs(title = "30 Most Prominent Authors in 21st Century Anthologies",
       subtitle = "by Total Pages\n",
       x = "\nTotal Pages",
       y = "Author\n",
       fill = "# of 21st Century\nAnthologies\nAuthor Occurs In\n") +

  scale_x_continuous(limits = c(0, max(author_stats$total_num_pages)),
                     breaks = seq(0,
                                  max(author_stats$total_num_pages)+200,
                                  200)) +

  guides(fill = guide_legend(reverse = TRUE)) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5))

### **Bar Chart**: TOP 30 AUTHORS, over ALL CENTURIES, by TOTAL PAGES

In [None]:
#BAR CHART -- TOP 30 AUTHORS, ALL CENTURIES, BY TOTAL PAGES, COLOR=PROPORTION
ggplot(author_stats_all_centuries,
       aes(x=total_num_pages, y=reorder(author, total_num_pages),
           fill=proportion)) +

  geom_col() +

  scale_fill_gradient(name = "% of Pages in\nAll Anthologies\n",
                      low = "#2A3950",
                      high = "#6bc9ff",
                      limits = c(0.0, max(author_stats_all_centuries$proportion)),
                      breaks = c(0, 1.0, 2.0, 2.9)) +

  labs(title = "Top 30 Authors By Total Anthology Pages",
       subtitle = "19th, 20th, and 21st Century\n",
       x = "\nTotal Pages",
       y = "Author") +

  scale_x_continuous(breaks = seq(0,
                              max(author_stats_all_centuries$total_num_pages)+200,
                              400)) +

  guides(fill = guide_colorbar(reverse = FALSE)) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5))

In [None]:
# BAR CHART -- TOP 30 AUTHORS, ALL CENTURIES, BY TOTAL PAGES, COLOR=FREQUENCY
ggplot(author_stats_all_centuries,
       aes(x=total_num_pages, y=reorder(author, total_num_pages),
           fill=(frequency))) +

  geom_col() +

  labs(title = "Most Prominent Authors Across 19th, 20th, and 21st Century Anthologies",
       subtitle = "by Total Pages\n",
       x = "\nTotal Pages",
       y = "Author\n",
       fill = "# of Anthologies\nAuthor Occurs In\n") +

  scale_x_continuous(limits = c(0, max(author_stats_all_centuries$total_num_pages)),
                     breaks = seq(0,
                                  max(author_stats_all_centuries$total_num_pages)+200,
                                  300)) +

  scale_fill_gradient2(name = "# of Anthologies\nAuthor Occurs In\n",
                      low = "#F2B90C",
                      mid = "#89A7A7",
                      high = "#2B3A67",
                      midpoint = 13,
                      limits = c(min(author_stats_all_centuries$frequency), max(author_stats_all_centuries$frequency)),
                      breaks = c(5, 14, 22)) +

  guides(fill = guide_colorbar(reverse = FALSE)) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5))

### **Bar Chart**: Native American Literature Page Counts by Century

In [None]:
# NATIVE AMERICAN LITERATURE - CENTURY vs. TOTAL PAGES, COLOR=FREQUENCY

ggplot(native_lit_stats, aes(x=century, y = total_num_pages, fill=factor(frequency))) +
  geom_col(width=.6) +

  labs(title = "Prominence of Native American Literature\nin American Literary Anthologies",
       subtitle = "By Total Pages\n",
       x = "\n Century",
       y = "Total Pages\n",
       fill = "Number of\nAnthologies\nFeaturing Native\nLiterature") +

  geom_text(aes(label=total_num_pages), vjust = -.5, size=3.5)+

  guides(fill = guide_legend(reverse = TRUE)) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5)) +

  scale_y_continuous(limits = c(0, max(native_lit_stats$total_num_pages)+20),
                     breaks = seq(0,
                                  max(native_lit_stats$total_num_pages)+20,
                                  50)) +

  scale_fill_manual(values = my_colors3)

### **Clustered Bar Chart**: Native American Literature vs. Most Featured Author

In [None]:
# NATIVE AMERICAN AND OTHER AUTHOR PAGE COUNT

ggplot(combined_native_and_other_stats, aes(x = century, y = total_num_pages, fill = author, group = author)) +
  geom_col(position=position_dodge2(width = 0.9, padding = 0.025, preserve = "single"))+

  labs(
    title = "Number of Pages Received",
    subtitle = "Native American Literature vs. Most Featured Authors\n",
    x = "\nCentury",
    y = "Number of Pages\n",
    fill= "Author") +

  scale_fill_manual(values = my_colors3) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5)) +

  geom_text(aes(label=total_num_pages), vjust = -.5, size = 3, position = position_dodge(width = 0.9))



In [None]:
# NATIVE AMERICAN AND OTHER AUTHOR FREQUENCY

ggplot(combined_native_and_other_stats, aes(x = century, y = frequency, fill = author, group = author)) +
  geom_col(position=position_dodge2(width = 1, padding = 0.05, preserve = "single"))+

  labs(
    title = "Number of Anthologies Featured In",
    subtitle = "Native American Literature vs. Most Featured Author\n",
    x = "\nCentury",
    y = "Number of Anthologies Featured In\n",
    fill= "Author") +

  scale_fill_manual(values = my_colors3) +

  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust=.5),
        legend.position = "bottom") +

  scale_y_continuous(limits = c(0, max(combined_native_and_other_stats$frequency)+1),
                     breaks = seq(0,
                                  max(combined_native_and_other_stats$frequency)+1,
                                  2))
