In [None]:
#installing packages
install.packages(c("showtext", "sysfonts", "dplyr", "ggplot2", "corrplot", "tidyverse"))

# Loading libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(showtext)
library(sysfonts)

In [None]:
# Add Google Noto Sans TC font for traditional Chinese font
font_add_google("Noto Sans TC", "noto")
showtext_auto()

In [None]:
#read in data
df <- read.csv("/content/Age_County_Gender_19Cov.csv")

In [None]:
#Cleaning the data (removing rows with ambiguous content)
df <- df %>%
  mutate(確定病例數 = as.numeric(確定病例數))
df <- df %>%
  mutate(發病月份 = as.numeric(發病月份))
sum(is.na(df$確定病例數))
df <- df %>%
  filter(!is.na(確定病例數))
df <- df %>%
  filter(發病年份 != 2)
df <- df %>%
  filter(性別 == 'M' | 性別 == 'F')
df <- df %>%
  filter(!(年齡層 %in% c('0', '1', '2', '3', '4', '5-9')))

Total COVID-19 Cases per Year

In [None]:
cases_per_year <- df %>%
  group_by(發病年份) %>%
  summarise(total_cases = sum(確定病例數))

ggplot(cases_per_year, aes(x = 發病年份, y = total_cases)) +
  geom_col(fill = "deepskyblue") +
  scale_y_log10() +
  labs(title = "Total COVID-19 Cases per Year (Log Scale)",
       x = "Year",
       y = "Total Cases (Log Scale)") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(size = 28),
        axis.text.y = element_text(size = 28),
        axis.title.x = element_text(size = 30),
        axis.title.y = element_text(size = 30),
        plot.title = element_text(size = 32),
        legend.title = element_text(size = 30),
        legend.text = element_text(size = 28))

In [None]:
cases_per_year_imported <- df %>%
  group_by(發病年份, 是否為境外移入) %>%
  summarise(total_cases = sum(確定病例數)) %>%
  ungroup()

custom_colors <- c("是" = "orange", "否" = "green")

ggplot(cases_per_year_imported, aes(x = 發病年份, y = total_cases, fill = 是否為境外移入)) +
  geom_col(position = "stack") +
  scale_y_log10() +
  scale_fill_manual(values = custom_colors) +
  labs(title = "Total COVID-19 Cases per Year (Log Scale)",
       x = "Year",
       y = "Total Cases (Log Scale)",
       fill = "Imported") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(size = 28),
        axis.text.y = element_text(size = 28),
        axis.title.x = element_text(size = 30),
        axis.title.y = element_text(size = 30),
        plot.title = element_text(size = 32),
        legend.title = element_text(size = 30),
        legend.text = element_text(size = 28))

Proportion of imported COVID-19 Cases

In [None]:
cases_by_import_status <- df %>%
  group_by(是否為境外移入) %>%
  summarise(total_cases = sum(確定病例數))

options(repr.plot.width = 12, repr.plot.height = 8)
custom_colors <- c("是" = "orange", "否" = "green")

ggplot(cases_by_import_status, aes(x = 是否為境外移入, y = total_cases, fill = 是否為境外移入)) +
  geom_col() +
  scale_fill_manual(values = custom_colors) +
  scale_y_log10() +
  labs(title = "Proportion of Imported COVID-19 Cases",
       x = "Imported Case",
       y = "Total Cases (Log Scale)") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(size = 28),
        axis.text.y = element_text(size = 28),
        axis.title.x = element_text(size = 30),
        axis.title.y = element_text(size = 30),
        plot.title = element_text(size = 32),
        legend.title = element_text(size = 30),
        legend.text = element_text(size = 28))

In [None]:
# Summarize the total cases for each import status
cases_by_import_status <- df %>%
  group_by(是否為境外移入) %>%
  summarise(total_cases = sum(確定病例數))

# Calculate the total number of cases
total_cases <- sum(cases_by_import_status$total_cases)

# Calculate the number of imported cases
imported_cases <- cases_by_import_status %>%
  filter(是否為境外移入 == "是") %>%
  pull(total_cases)

# Calculate the proportion of imported cases
proportion_imported <- imported_cases / total_cases

# Print the proportion
proportion_imported

Total COVID-19 Cases by Month in Different Years

In [None]:
case_by_month_year <- df %>%
  group_by(發病年份, 發病月份) %>%
  summarise(total_cases = sum(確定病例數, na.rm = TRUE))

ggplot(case_by_month_year, aes(x = 發病月份, y = total_cases, color = as.factor(發病年份), group = 發病年份)) +
  geom_line(size = 1.5) +
  scale_y_log10() +
  geom_point(size = 5) +
  labs(x = "Month", y = "Total Cases", color = "Year") +
  scale_x_continuous(breaks = 1:12, labels = month.abb) +
  theme_minimal() +
  ggtitle("Total COVID-19 Cases by Month in Different Years") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(size = 28),
        axis.text.y = element_text(size = 28),
        axis.title.x = element_text(size = 30),
        axis.title.y = element_text(size = 30),
        plot.title = element_text(size = 32),
        legend.title = element_text(size = 30),
        legend.text = element_text(size = 28))

COVID-19 Cases by Gender

In [None]:
cases_by_gender <- df %>%
  group_by(性別) %>%
  summarise(total_cases = sum(確定病例數))

ggplot(cases_by_gender, aes(x = 性別, y = total_cases, fill = 性別)) +
  geom_col() +
  labs(title = "COVID-19 Cases by Gender",
       x = "Gender",
       y = "Total Cases",
       color = "Gender") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(size = 28),
        axis.text.y = element_text(size = 28),
        axis.title.x = element_text(size = 30),
        axis.title.y = element_text(size = 30),
        plot.title = element_text(size = 32),
        legend.title = element_text(size = 30),
        legend.text = element_text(size = 28))

T-test for Gender: Male versus Female

In [None]:
t_test_imported <- t.test(確定病例數 ~ 是否為境外移入, data = df)
print(t_test_imported)

COVID-19 Cases by Age Group

In [None]:
cases_by_age_group <- df %>%
  group_by(年齡層) %>%
  summarise(total_cases = sum(確定病例數))

ggplot(cases_by_age_group, aes(x = 年齡層, y = total_cases, fill = 年齡層)) +
  geom_col() +
  labs(title = "COVID-19 Cases by Age Group",
       x = "Age Group",
       y = "Total Cases") +
  theme(aspect.ratio = 3/6,
        axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 22),
        plot.title = element_text(size = 24),
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20))

COVID-19 Cases by Region

In [None]:
cases_by_region <- df %>%
  group_by(縣市) %>%
  summarise(total_cases = sum(確定病例數))

options(repr.plot.width = 12, repr.plot.height = 8)

ggplot(cases_by_region, aes(x = reorder(縣市, -total_cases), y = total_cases)) +
  geom_col(fill = "deepskyblue") +
  labs(title = "COVID-19 Cases by Region",
       x = "Region",
       y = "Total Cases") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 22),
        plot.title = element_text(size = 24),
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20))

In [None]:
cases_by_region_gender <- df %>%
  group_by(縣市, 性別) %>%
  summarise(total_cases = sum(確定病例數)) %>%
  ungroup()

ggplot(cases_by_region_gender, aes(x = reorder(縣市, -total_cases), y = total_cases, fill = 性別)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "COVID-19 Cases by Region and Gender",
       x = "Region",
       y = "Total Cases",
       fill = "Gender") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 22),
        plot.title = element_text(size = 24),
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20))

Total Cases per Year in Special Municipalities

In [None]:
regions_of_interest <- c("台北市", "新北市", "桃園市", "台中市", "台南市", "高雄市")

df_filtered <- df %>%
  filter(縣市 %in% regions_of_interest)

cases_per_year_region <- df_filtered %>%
  group_by(發病年份, 縣市) %>%
  summarise(total_cases = sum(確定病例數)) %>%
  ungroup()

ggplot(cases_per_year_region, aes(x = 發病年份, y = total_cases)) +
  geom_col(fill = "blue") +
    scale_y_log10() +
  facet_wrap(~ 縣市, scales = "free_y") +
  labs(title = "Total Cases per Year in Special Municipality",
       x = "Year",
       y = "Total Cases(Log Scale)") +
  theme(aspect.ratio = 3/4,
        axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 22),
        plot.title = element_text(size = 24),
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20),
        strip.text = element_text(size = 20))

Total Cases by Gender and Age Group in Special Municipalities

In [None]:
cases_per_age_region_gender <- df_filtered %>%
  group_by(年齡層, 縣市, 性別) %>%
  summarise(total_cases = sum(確定病例數)) %>%
  ungroup()

ggplot(cases_per_age_region_gender, aes(x = 年齡層, y = total_cases, color = 性別)) +
  geom_point(size = 3) +
  facet_wrap(~ 縣市, scales = "free_y") +
  labs(title = "Total Cases by Gender and Age Group in Special Municipality",
       x = "Age Group",
       y = "Total Cases",
       color = "Gender") +
  theme(aspect.ratio = 3/6,
        axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
        axis.text.y = element_text(size = 12),
        axis.title.x = element_text(size = 22),
        axis.title.y = element_text(size = 22),
        plot.title = element_text(size = 24),
        legend.title = element_text(size = 22),
        legend.text = element_text(size = 20),
        strip.text = element_text(size = 20))