# **Apply**: Looping on the command line

-   lapply(): Loop over a list and evaluate and function on each element
-   sapply(): Same as lapply() but try to simplify the result
-   apply(): Apply a function over the margins of an array
-   tapply(): Apply a function over subsets of a vector
-   mapply(): Multivariate version of lapply

# lapply()

In [1]:
x <- list(a = 1:5, b = rnorm(10))
lapply(x, mean)

In [2]:
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)

In [6]:
x <- 1:4
lapply(x, runif)

In [7]:
x <- 1:4
lapply(x, runif, min = 0, max = 10)

In [8]:
x <- list(a = matrix(1:4, 2, 2), b = matrix(1:6, 3, 2))
x

0,1
1,3
2,4

0,1
1,4
2,5
3,6


In [9]:
lapply(x, function(elt) elt[,1])

# sapply()

In [11]:
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
sapply(x, mean)

In [14]:
str(apply)

function (X, MARGIN, FUN, ..., simplify = TRUE)  


1 : rowwise

2 : colwise

In [16]:
x <- matrix(rnorm(200), 20, 10)
apply(x, 1, sum)   #rowSums
apply(x, 2, mean)  #colMeans

In [17]:
x <- matrix(rnorm(200), 20, 10)
apply(x, 1, quantile, probs = c(0.25, 0.75))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
25%,-1.0026197,0.2212035,-0.78531989,-0.6949228,-1.4020226,-1.1815717,-0.8418331,-0.8871996,-0.1289502,-1.110636,-1.1634183,-1.0287939,-0.672565,-0.9229873,-0.6953154,-0.2278993,-0.5287917,-0.4541874,-0.3858306,-0.8526098
75%,0.1201862,0.8543376,0.01872764,0.9967704,-0.4180892,0.2726168,0.8448166,0.3088824,0.1763737,0.2773667,0.8764587,0.7785223,0.5018576,-0.2233608,0.9938846,0.4943931,0.6044566,0.8698024,0.1687056,0.4544548


In [29]:
a <- array(rnorm(2 * 2 * 10), c(2, 2, 10))
apply(a, c(1, 2), mean) #keep the dimensions (1,2) : (2,2)
# rowMeans(a, dims = 2) same as above statement and keep first 2 dimentions (1,2) : (2,2)

0,1
-0.08967845,-0.1446001
0.30910713,0.3523174


# tapply()

In [30]:
str(tapply)

function (X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE)  


In [33]:
x <- c(rnorm(10), runif(10), rnorm(10, 1))
f <- gl(6, 5)  # generate 6 levels of 5 each
f
tapply(x, f, mean) # apply to each level

In [34]:
tapply(x, f, mean, simplify = FALSE)

In [35]:
tapply(x, f, range)

# split()

In [36]:
str(split)

function (x, f, drop = FALSE, ...)  


In [37]:
x <- c(rnorm(10), runif(10), rnorm(10, 1))
f <- gl(3, 10)
split(x, f)

In [38]:
x <- c(rnorm(10), runif(10), rnorm(10, 1))
f <- gl(3, 10)
lapply(split(x, f), mean)

# Splitting a Data Frame

In [39]:
library(datasets)
head(airquality)
s <- split(airquality, airquality$Month)
lapply(s, function(x) colMeans(x[, c("Ozone", "Solar.R", "Wind")]))

Unnamed: 0_level_0,Ozone,Solar.R,Wind,Temp,Month,Day
Unnamed: 0_level_1,<int>,<int>,<dbl>,<int>,<int>,<int>
1,41.0,190.0,7.4,67,5,1
2,36.0,118.0,8.0,72,5,2
3,12.0,149.0,12.6,74,5,3
4,18.0,313.0,11.5,62,5,4
5,,,14.3,56,5,5
6,28.0,,14.9,66,5,6


In [40]:
sapply(s, function(x) colMeans(x[, c("Ozone", "Solar.R", "Wind")]))

Unnamed: 0,5,6,7,8,9
Ozone,,,,,
Solar.R,,190.16667,216.483871,,167.4333
Wind,11.62258,10.26667,8.941935,8.793548,10.18


In [41]:
sapply(s, function(x) colMeans(x[, c("Ozone", "Solar.R", "Wind")], na.rm  = TRUE))

Unnamed: 0,5,6,7,8,9
Ozone,23.61538,29.44444,59.115385,59.961538,31.44828
Solar.R,181.2963,190.16667,216.483871,171.857143,167.43333
Wind,11.62258,10.26667,8.941935,8.793548,10.18


In [42]:
x <- rnorm(10)
f1 <- gl(2, 5)
f2 <- gl(5, 2)
f1
f2
split(x, list(f1, f2))

In [43]:
str(split(x, list(f1, f2)))

List of 10
 $ 1.1: num [1:2] -1.837 -0.244
 $ 2.1: num(0) 
 $ 1.2: num [1:2] -0.722 0.721
 $ 2.2: num(0) 
 $ 1.3: num -1
 $ 2.3: num -1.4
 $ 1.4: num(0) 
 $ 2.4: num [1:2] 1.947 -0.478
 $ 1.5: num(0) 
 $ 2.5: num [1:2] -0.0287 0.0545


In [45]:
str(split(x, list(f1, f2),drop=TRUE))

List of 6
 $ 1.1: num [1:2] -1.837 -0.244
 $ 1.2: num [1:2] -0.722 0.721
 $ 1.3: num -1
 $ 2.3: num -1.4
 $ 2.4: num [1:2] 1.947 -0.478
 $ 2.5: num [1:2] -0.0287 0.0545


# mapply()

In [46]:
str(mapply)

function (FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)  


In [47]:
list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1))

In [48]:
mapply(rep, 1:4, 4:1) # same as above

Vectorizing a function

In [54]:
noise <- function(n, mean, sd) {
  rnorm(n, mean, sd)
}
noise(5, 1, 2)
noise(1:5, 1:5, 2) # not work as expected

In [56]:
mapply(noise, 1:5, 1:5, 2)
# list(noise(1, 1, 2), noise(2, 2, 2), noise(3, 3, 2), noise(4, 4, 2), noise(5, 5, 2))