# Graphics 

In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [2]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
%%R

# My commonly used R imports

require('tidyverse')

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Loading required package: tidyverse


# First graph, 1937

In [61]:
%%R
library(readxl)
df_trees37 <- read_excel('Forest.xlsx')
df_trees37 %>% tail()

# A tibble: 6 × 5
   Year Species         Rank `Proportion type`   `%`
  <dbl> <chr>          <dbl> <chr>             <dbl>
1  1937 Elms               7 Share of count      1.6
2  1937 Dogwood            8 Share of count      1.4
3  1937 Tulip tree         9 Share of count      1.3
4  1937 Maples            10 Share of count      1.2
5  1937 American beech    11 Share of count      1.1
6  1937 European ash      12 Share of count      1  


In [58]:
%%R -w 450 -h 490

library(ggplot2)
library(readxl)

df_trees37$Rank <- factor(df_trees37$Rank)

ggplot(df_trees37, aes(x = Rank, y = `%`, color = `Proportion type`, group = `Proportion type`)) +
  geom_line(linewidth = 1) +
  geom_point() +
  geom_text_repel(data = df_trees37 %>% filter(Rank %in% c("1", "2", "3", "4")), 
                  aes(label = Species), color = "blue") +
  scale_y_continuous(limits = c(NA, 35)) + 
  scale_color_manual(values = c("Area" = "lightblue", "Share of count" = "lightgreen")) +
  theme_minimal() +
  labs(title = "1937", x = "Rank", y = "Percentage") +
    theme(
    axis.text.y = element_text(margin = margin(r = 5, unit = "mm")),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "grey", size = 0.5),
    panel.grid.minor.y = element_blank(),
    legend.position = "top",
    legend.title = element_blank()
  )

ggsave("trees1.svg", width = 4, height = 4, units = "in", device = "svg")

# Second graph 1985

In [62]:
%%R
library(readxl)
df_trees85 <- read_excel('Forest1985.xlsx')
df_trees85 %>% head()

# A tibble: 6 × 5
   Year Species           Rank `Proportion type`   `%`
  <dbl> <chr>            <dbl> <chr>             <dbl>
1  1985 Oaks                 1 Area               23  
2  1985 Northern red oak     2 Area               13  
3  1985 Red maple            3 Area               13  
4  1985 Tulip tree           4 Area               12.5
5  1985 American beech       5 Area               10  
6  1985 Hickories            6 Area                9  


In [68]:
%%R -w 450 -h 490

library(ggplot2)
library(readxl)

df_trees85$Rank <- factor(df_trees85$Rank)

ggplot(df_trees85, aes(x = Rank, y = `%`, color = `Proportion type`, group = `Proportion type`)) +
  geom_line(linewidth = 1) +
  geom_point() +
  geom_text_repel(data = df_trees85 %>% filter(Rank %in% c("1", "2", "3", "4")), 
                  aes(label = Species), color = "blue") +
  scale_y_continuous(limits = c(NA, 35)) + 
  scale_color_manual(values = c("Area" = "lightblue", "Share of count" = "lightgreen")) +
  theme_minimal() +
  labs(x = "Rank", y = "Percentage") +
    theme(
    axis.text.y = element_text(margin = margin(r = 5, unit = "mm")),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "grey", size = 0.5),
    panel.grid.minor.y = element_blank(),
    legend.position = "top",
    legend.title = element_blank()
  )

ggsave("trees1985.svg", width = 4, height = 4, units = "in", device = "svg")

ggrepel: 2 unlabeled data points (too many overlaps). Consider increasing max.overlaps 


# Third graph 2021

In [70]:
%%R
library(readxl)
df_trees21 <- read_excel('Forest2021.xlsx')
df_trees21 %>% head()

# A tibble: 6 × 5
   Year Species        Rank `Proportion type`   `%`
  <dbl> <chr>         <dbl> <chr>             <dbl>
1  2021 Oaks              1 Area               23  
2  2021 Red Oak           2 Area               13  
3  2021 Red mable         3 Area               12.9
4  2021 Tulip tree        4 Area               12.5
5  2021 American beec     5 Area               10  
6  2021 Hickory           6 Area                9  


In [72]:
%%R -w 450 -h 490

library(ggplot2)
library(readxl)

df_trees21$Rank <- factor(df_trees37$Rank)

ggplot(df_trees21, aes(x = Rank, y = `%`, color = `Proportion type`, group = `Proportion type`)) +
  geom_line(linewidth = 1) +
  geom_point() +
  geom_text_repel(data = df_trees21 %>% filter(Rank %in% c("1", "2", "3", "4")), 
                  aes(label = Species), color = "blue") +
  scale_y_continuous(limits = c(NA, 35)) + 
  scale_color_manual(values = c("Area" = "lightblue", "Share of count" = "lightgreen")) +
  theme_minimal() +
  labs(x = "Rank", y = "Percentage") +
    theme(
    axis.text.y = element_text(margin = margin(r = 5, unit = "mm")),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "grey", size = 0.5),
    panel.grid.minor.y = element_blank(),
    legend.position = "top",
    legend.title = element_blank()
  )

ggsave("trees2021.svg", width = 4, height = 4, units = "in", device = "svg")