In [3]:
library(cowplot)
library(infer)
library(repr)
library(tidyverse)
library(broom)
library(digest)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6      [32m✔[39m [34mpurrr  [39m 0.3.4 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.4.1 
[32m✔[39m [34mreadr  [39m 2.1.2      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


In [4]:
# read the data from web(but since it's too large, we'll download version)
salmon <- read.csv("data/ASL_master.csv")

# Disply first 5 rows of raw data
head(salmon)

Unnamed: 0_level_0,Species,Length.Measurement.Type,sampleYear,ASLProjectType,LocationID,sampleDate,Length,Weight,Sex,Salt.Water.Age,⋯,Flag,Gear,SASAP.Region,LocationUnique,DistrictID,Sub.DistrictID,Stat.area,Lat,Lon,AWC_CODE
Unnamed: 0_level_1,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<dbl>,<chr>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<chr>
1,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,3,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,
2,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,3,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,
3,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,3,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,
4,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,3,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,
5,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,4,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,
6,chinook,length not taken,1992,commercial catch,Affleck Canal/Spanish Is/Louise Cove,1992-03-31,,,examined but did not identify,3,⋯,,troll,Southeast,Affleck Canal/Spanish Is/Louise Cove-commercial catch-10510,105,10,10510,,,


In [29]:
# clean the data and pick the column species and Length we interest, and show the first 6 row
set.seed(1)
chinook <- 
    salmon %>%  
    select("Species", "Length", "Sex") %>%
    filter(Sex == "male" | Sex == "female") %>%
    filter(!is.na(Length)) 
head(chinook)

Unnamed: 0_level_0,Species,Length,Sex
Unnamed: 0_level_1,<chr>,<int>,<chr>
1,chinook,796,female
2,chinook,855,female
3,chinook,658,male
4,chinook,510,male
5,chinook,465,male
6,chinook,560,male


In [37]:
# summary of chinook data(sd, mean, median, etc.)
chinook_summary <-
     chinook %>% 
     group_by(Sex) %>% 
     summarise(n = n(),
               x_bar = mean(Length),
               sd = sd(Length),
               median = median(Length),
               IQR = IQR(Length),
               `.groups` = "drop") %>% 
     pivot_wider(names_from = Sex, values_from = c(n, x_bar, sd, median, IQR)) 

chinook_summary

n_female,n_male,x_bar_female,x_bar_male,sd_female,sd_male,median_female,median_male,IQR_female,IQR_male
<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
55712,66259,838.8231,795.3091,129.1345,192.7826,854,820,141.25,290


In [40]:
# group by Sex column and calculate quartiles for each group
IQR_chinook <- 
chinook %>%
group_by(Sex) %>%
summarize(q25 = quantile(Length, probs = 0.25),
          q50 = quantile(Length, probs = 0.5),
          q75 = quantile(Length, probs = 0.75))
IQR_chinook

Sex,q25,q50,q75
<chr>,<dbl>,<dbl>,<dbl>
female,781.75,854,923
male,650.0,820,940
