# Creating class section size bands for the Common Data Set

### Simulate some data

In [6]:
df <- data.frame(
    SUBJ = sample(c("BIOL", "CHEM", "ENGL", "SOC", "PSCI", "ECON", "HIST", "SPAN"),
        size = 50, replace = TRUE
    ),
    NUMB = sample(c(101, 201, 301, 401), size = 50, replace = TRUE),
    ENROLLMENT = sample(2:120, size = 50)
)

head(df)

Unnamed: 0_level_0,SUBJ,NUMB,ENROLLMENT
Unnamed: 0_level_1,<chr>,<dbl>,<int>
1,CHEM,101,79
2,HIST,101,80
3,PSCI,401,103
4,SPAN,301,71
5,PSCI,201,2
6,CHEM,201,68


### Base R solution

In [7]:
df$SizeBand[df$ENROLLMENT <= 9] <- '2-9' 
df$SizeBand[df$ENROLLMENT >= 10 & df$ENROLLMENT <= 19] <- '10-19'
df$SizeBand[df$ENROLLMENT >= 20 & df$ENROLLMENT <= 29] <- '20-29'
df$SizeBand[df$ENROLLMENT >= 30 & df$ENROLLMENT <= 39] <- '30-39'
df$SizeBand[df$ENROLLMENT >= 40 & df$ENROLLMENT <= 49] <- '40-49'
df$SizeBand[df$ENROLLMENT >= 50 & df$ENROLLMENT <= 99] <- '50-99'
df$SizeBand[df$ENROLLMENT >= 100] <- '100+'

df$SizeBand <- factor(
    df$SizeBand, 
    levels = c('2-9', '10-19', '20-29', '30-39', '40-49', '50-99', '100+')
    )

table(df$SizeBand, useNA = 'ifany')


  2-9 10-19 20-29 30-39 40-49 50-99  100+ 
    5     3     3     3     5    23     8 

### Tidyverse solution

In [8]:
library(tidyverse)

df %>% 
  mutate(
    SizeBand = case_when(
      ENROLLMENT <= 9 ~ '2-9',
      ENROLLMENT >= 10 & ENROLLMENT <= 19 ~ '10-19',
      ENROLLMENT >= 20 & ENROLLMENT <= 29 ~ '20-29',
      ENROLLMENT >= 30 & ENROLLMENT <= 39 ~ '30-39',
      ENROLLMENT >= 40 & ENROLLMENT <= 49 ~ '40-49',
      ENROLLMENT >= 50 & ENROLLMENT <= 99 ~ '50-99',
      ENROLLMENT >= 100 ~ '100+',
      TRUE ~ 'CHECK'
      ),
    SizeBand = fct_relevel(SizeBand, '2-9'),
    SizeBand = fct_relevel(SizeBand, '100+', after=Inf)
  ) %>% 
  count(SizeBand)

SizeBand,n
<fct>,<int>
2-9,5
10-19,3
20-29,3
30-39,3
40-49,5
50-99,23
100+,8
