/
2020-06-08.R
139 lines (129 loc) · 4.84 KB
/
2020-06-08.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
library(tidyverse)
fishing <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-08/fishing.csv')
stocked <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-08/stocked.csv')
colSums(is.na(fishing))
top_10_fishes <- fishing %>% group_by(species) %>% count() %>%
arrange(desc(n)) %>%
ungroup() %>%
top_n(10) %>%
select(species)
fishing %>% select(-comments) %>%
filter(species %in% top_10_fishes$species) %>%
group_by(lake, species, year) %>%
summarize(`total number of fish`=sum(values)) %>%
arrange(desc(`total number of fish`)) %>%
ggplot(aes(year,`total number of fish`, fill = species)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~lake, scales = "free") +
theme_bw() +
theme(
strip.text = element_text(size=15, face = "bold"),
axis.ticks = element_blank(),
axis.title = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 13),
legend.position = "bottom",
legend.text = element_text(size = 15),
legend.title = element_blank(),
plot.title = element_text(size = 16)
) +
ggtitle("10 Most Populated Fishes In The Great Lakes")
#ggsave("10 Most Populated Fishes In The Great Lakes.png", height = 10, width = 20)
names(stocked) <- tolower(names(stocked))
stocked$year <- as.integer(stocked$year)
colSums(is.na(stocked))
dim(stocked)
library(scales)
stocked %>% select(year, lake, species, no_stocked, weight) %>%
mutate(lake = case_when(
lake == "MI" ~ "Michigan",
lake == "SU" ~ "Superior",
lake == "ON" ~ "Ontario",
lake == "ER" ~ "Erie",
lake == "HU" ~ "Huron",
lake == "SC" ~ "Saint Clair"
)) %>%
ggplot(aes(year, no_stocked/1000, color = lake)) +
geom_point(size = 2.5) +
facet_wrap(~species, scales = "free") +
theme_bw() +
theme(
strip.text = element_text(size=15, face = "bold"),
axis.ticks = element_blank(),
axis.title = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 13),
legend.position = "bottom",
legend.text = element_text(size = 15),
legend.title = element_blank(),
plot.title = element_text(size = 16)
) +
scale_x_continuous(breaks = seq(1950, 2018, 10)) +
labs(x = "year", y = "number stocked (thousands)") +
ggtitle("Time-Wise Fishes Stocked In The Great Lakes")
#ggsave("Time-Wise Fishes Stocked In The Great Lakes.png", height = 10, width = 20)
#ggstream does not work for this code
# library(ggstream)
# stocked %>% select(year, lake, species, no_stocked, weight) %>%
# mutate(lake = case_when(
# lake == "MI" ~ "Michigan",
# lake == "SU" ~ "Superior",
# lake == "ON" ~ "Ontario",
# lake == "ER" ~ "Erie",
# lake == "HU" ~ "Huron",
# lake == "SC" ~ "Saint Clair"
# )) %>%
# drop_na() %>%
# ggplot(aes(year, weight, fill = species)) +
# geom_stream()
fishing %>% select(-comments) %>%
filter(species %in% top_10_fishes$species) %>%
group_by(lake, species, year) %>%
summarize(`total number of fish`= sum(values)) %>%
arrange(desc(`total number of fish`)) %>%
ggplot(aes(year, log(`total number of fish`), fill = species)) +
geom_violin() +
facet_grid(lake~species, scales = "free") +
theme_bw() +
theme(
strip.text = element_text(size=15, face = "bold"),
axis.ticks = element_blank(),
axis.title = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 15, angle = 30),
legend.position = "none",
legend.text = element_text(size = 15),
legend.title = element_blank(),
plot.title = element_text(size = 16)
) +
scale_x_continuous(breaks = seq(1900, 2018, 50)) +
labs(x = "year", y = "log-scale total number of fish") +
ggtitle("Time Series 10 Most Populated Fishes In The Great Lakes")
#ggsave("Time Series 10 Most Populated Fishes In The Great Lakes.png", height = 10, width = 20)
library(ggridges)
stocked %>% select(year, lake, species, no_stocked, weight) %>%
mutate(lake = case_when(
lake == "MI" ~ "Michigan",
lake == "SU" ~ "Superior",
lake == "ON" ~ "Ontario",
lake == "ER" ~ "Erie",
lake == "HU" ~ "Huron",
lake == "SC" ~ "Saint Clair"
)) %>%
ggplot(aes(log(weight+1), species, fill = species)) +
geom_density_ridges() +
scale_x_continuous() +
facet_wrap(~lake) +
theme_bw() +
theme(
strip.text = element_text(size=15, face = "bold"),
axis.ticks = element_blank(),
axis.title = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 13),
legend.position = "none",
plot.title = element_text(size = 16)
) +
labs(x = "Weight (log scale)", y = " ") +
ggtitle("Weights Of Fishes Stocked In The Great Lakes")
ggsave("Weights Of Fishes Stocked In The Great Lakes.png", height = 10, width = 20)