```{webr}
#| context: setup
# Load dataset
df <- read_excel("../../data/PublicHealth_BP_Dataset.xlsx")

# Set ggplot theme
theme_set(theme_minimal(base_size = 12))
```

# Title Slide {background-color="#f0f0f0"}

## Categorical Base Plots

**Author:** Sparrow

**Date:** `r Sys.Date()`

## Bar Chart - Gender Distribution


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
gender_data <- df %>%
  count(Gender) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(n, "\n(", round(percentage, 1), "%)")
  )

# Create bar chart
ggplot(gender_data, aes(x = Gender, y = n, fill = Gender)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = label), vjust = -0.5, size = 4) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Distribution by Gender", 
       x = "Gender", 
       y = "Frequency") +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14))
```


## Pie Chart - Region Distribution


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
region_data <- df %>%
  count(Region) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(Region, "\n", n, " (", round(percentage, 1), "%)")
  )

# Create pie chart
ggplot(region_data, aes(x = "", y = n, fill = Region)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5), 
            size = 3.5) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(title = "Distribution by Region") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        legend.position = "none")
```


## Horizontal Bar Chart - Smoking Status


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
smoking_data <- df %>%
  count(SmokingStatus) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(n, " (", round(percentage, 1), "%)")
  ) %>%
  arrange(n)

# Create horizontal bar chart
ggplot(smoking_data, aes(x = reorder(SmokingStatus, n), y = n, fill = SmokingStatus)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = label), hjust = -0.1, size = 4) +
  coord_flip() +
  scale_fill_brewer(palette = "Set3") +
  labs(title = "Distribution by Smoking Status", 
       x = "Smoking Status", 
       y = "Frequency") +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14)) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
```


## Donut Chart - Chronic Disease Distribution


```{webr}
#| code-fold: false

# Calculate frequencies and percentages (excluding NA)
disease_data <- df %>%
  filter(!is.na(ChronicDisease)) %>%
  count(ChronicDisease) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(ChronicDisease, "\n", n, " (", round(percentage, 1), "%)")
  )

# Create donut chart
ggplot(disease_data, aes(x = 2, y = n, fill = ChronicDisease)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5), 
            size = 3.5) +
  scale_fill_brewer(palette = "Pastel2") +
  xlim(0.5, 2.5) +
  labs(title = "Chronic Disease Distribution") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        legend.position = "none")
```


## Dodged Bar Chart - Smoking × Alcohol Use


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
smoking_alcohol_data <- df %>%
  count(SmokingStatus, AlcoholUse) %>%
  group_by(SmokingStatus) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(n, "\n(", round(percentage, 1), "%)")
  ) %>%
  ungroup()

# Create dodged bar chart
ggplot(smoking_alcohol_data, aes(x = SmokingStatus, y = n, fill = AlcoholUse)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = label), 
            position = position_dodge(width = 0.7), 
            vjust = -0.5, 
            size = 3.5) +
  scale_fill_manual(values = c("No" = "#66C2A5", "Yes" = "#FC8D62")) +
  labs(title = "Smoking Status by Alcohol Use", 
       x = "Smoking Status", 
       y = "Frequency",
       fill = "Alcohol Use") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        legend.position = "top")
```


## Stacked Bar (Count) - Region × Smoking


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
region_smoking_data <- df %>%
  count(Region, SmokingStatus) %>%
  group_by(Region) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = ifelse(percentage > 5, paste0(n, "\n(", round(percentage, 1), "%)"), "")
  ) %>%
  ungroup()

# Create stacked bar chart
ggplot(region_smoking_data, aes(x = Region, y = n, fill = SmokingStatus)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5), 
            size = 3,
            color = "white",
            fontface = "bold") +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Region Distribution by Smoking Status (Count)", 
       x = "Region", 
       y = "Frequency",
       fill = "Smoking Status") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        legend.position = "top")
```


## Stacked Bar (Proportion) - Region × Smoking


```{webr}
#| code-fold: false

# Calculate proportions
region_smoking_prop <- df %>%
  count(Region, SmokingStatus) %>%
  group_by(Region) %>%
  mutate(
    prop = n / sum(n),
    percentage = prop * 100,
    label = paste0(round(percentage, 1), "%")
  ) %>%
  ungroup()

# Create stacked proportional bar chart
ggplot(region_smoking_prop, aes(x = Region, y = prop, fill = SmokingStatus)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5), 
            size = 3.5,
            color = "white",
            fontface = "bold") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Region Distribution by Smoking Status (Proportion 0-100%)", 
       x = "Region", 
       y = "Proportion",
       fill = "Smoking Status") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        legend.position = "top")
```


## Exploded Pie Chart - Treatment Group


```{webr}
#| code-fold: false

# Calculate frequencies and percentages
treatment_data <- df %>%
  count(TreatmentGroup) %>%
  mutate(percentage = n / sum(n) * 100)

# Create exploded pie chart using plotrix
par(mar = c(1, 1, 3, 1))
pie3D(treatment_data$n, 
      labels = paste0(treatment_data$TreatmentGroup, "\n", 
                     treatment_data$n, " (", 
                     round(treatment_data$percentage, 1), "%)"),
      explode = 0.1,
      col = c("#8DD3C7", "#FFFFB3"),
      main = "Treatment Group Distribution (Exploded)",
      cex.main = 1.2,
      labelcex = 1,
      theta = 1.2)
```


## Triple Donut - Plotly Version (Code)


```{webr}
#| code-fold: false
#| style: "font-size: 13px; overflow-y: auto; height: 400px;"

# Prepare data for each chronic disease and exercise frequency
diabetes_exercise <- df %>%
  filter(ChronicDisease == "Diabetes" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(percentage = n / sum(n) * 100)

htn_exercise <- df %>%
  filter(ChronicDisease == "Hypertension" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(percentage = n / sum(n) * 100)

heart_exercise <- df %>%
  filter(ChronicDisease == "Heart Disease" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(percentage = n / sum(n) * 100)

cat("Data prepared for three diseases\n")
```


## Triple Donut - ggplot2 Version (Code)


```{webr}
#| code-fold: false
#| style: "font-size: 13px; overflow-y: auto; height: 400px;"

# Prepare combined data for all three diseases
exercise_disease <- df %>%
  filter(!is.na(ChronicDisease) & !is.na(ExerciseFreq)) %>%
  count(ChronicDisease, ExerciseFreq) %>%
  mutate(
    ExerciseFreq = factor(ExerciseFreq, 
                          levels = c("Daily", "3-5 times/week", "1-2 times/week")),
    disease = factor(ChronicDisease, 
                     levels = c("Diabetes", "Hypertension", "Heart Disease"))
  )

# Prepare three separate datasets for each ring position
diabetes_df <- df %>%
  filter(ChronicDisease == "Diabetes" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(disease = "Diabetes", x = 0.5)

htn_df <- df %>%
  filter(ChronicDisease == "Hypertension" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(disease = "Hypertension", x = 0.7)

heart_df <- df %>%
  filter(ChronicDisease == "Heart Disease" & !is.na(ExerciseFreq)) %>%
  count(ExerciseFreq) %>%
  mutate(disease = "Heart Disease", x = 0.9)

combined <- bind_rows(diabetes_df, htn_df, heart_df)

cat("Data prepared for ggplot2 triple donut\n")
```


## Mosaic Plot: Description

::: {style="font-size: 22px; line-height: 1.6;"}

### When to Use
- Display relationships between 2+ categorical variables
- Show proportions in contingency tables
- Identify associations between variables

### Requirements
- Multiple categorical variables (typically 2-4)
- Can handle unbalanced data

### How to Interpret
- **Tile width**: First variable proportion
- **Tile height**: Second variable proportion (within category)
- **Tile area**: Combined proportion of both variables
- **Empty space**: Cells with low frequency
- **Color intensity**: Strength of association

### Advantages
- ✅ Shows multi-dimensional relationships
- ✅ Displays proportions and frequencies simultaneously
- ✅ Highlights sparse cells
- ✅ Can test independence visually

### Limitations
- ❌ Complex to interpret for beginners
- ❌ Can be overwhelming with many variables
- ❌ Not suitable for more than 4 variables

:::

## Mosaic Plot - Output


```{webr}
#| code-fold: false

# Create mosaic plot using vcd package
mosaic(mosaic_table,
       main = "Mosaic Plot: Treatment Group × Gender × Smoking × Alcohol Use",
       highlighting = "AlcoholUse",
       highlighting_fill = c("#8DD3C7", "#FB8072"),
       direction = c("v", "v", "h", "h"),
       spacing = spacing_highlighting,
       labeling = labeling_border(rot_labels = c(45, 0, 0, 0),
                                  just_labels = c("right", "center", "center", "center"),
                                  varnames = TRUE,
                                  abbreviate_labs = TRUE))
```


## Questions & Discussion

::: {style="text-align: center; font-size: 32px; margin-top: 200px;"}

**Thank you!**

Categorical Base Plots

**Any Questions?**

:::