# Setup

In [13]:
library(DMwR)
library(knitr)
library(ggplot2)
library(ggthemes)
library(patchwork)

# Data Description
## Overview
The data contain the following variables

* Season in which the measurements were collected.
* River size.
* River speed.
* Max pH value.
* Min oxygen level.
* Mean over three measurements
  * Chloride.
  * Nitrates.
  * Ammonium.
  * Orthophosphate.
  * Phosphate.
  * Chlorophyll.

Associated with each are seven frequencies of harmful algae, per water sample.

## Summary

### Head

In [None]:
head(algae)

### Summary Stats

In [None]:
kable(summary(algae))

## Variable Distributions

In [16]:
plot_variable <- function(var) {
  hist <- ggplot(algae, aes(x = {{ var }})) +
    geom_density() +
    theme_economist()

  qq <- ggplot(algae, aes(sample = {{ var }})) +
    stat_qq() +
    stat_qq_line() +
    theme_economist()

  hist + qq + plot_annotation(
    title = paste0("Distribution for ", deparse(substitute(var)))
  )
}

### Max pH

In [None]:
#| warning: false
plot_variable(mxPH)

### Min Oxygen

In [None]:
#| warning: false
plot_variable(mnO2)

### Mean Chloride

In [None]:
#| warning: false
plot_variable(Cl)

### Mean Nitrates

In [None]:
#| warning: false
plot_variable(NO3)

### Mean Ammonium

In [None]:
#| warning: false
plot_variable(NH4)

### Mean Orthophosphate

In [None]:
#| warning: false
plot_variable(oPO4)

### Mean Phosphate

In [None]:
#| warning: false
plot_variable(PO4)

### Mean Chlorophyll

In [None]:
#| warning: false
plot_variable(Chla)