# Import Data into R

In [None]:
chic_f <- read.csv("Chicago-F.csv",row.names=1)
san_f <- read.csv("SanFrancisco-F.csv",row.names=1)
ny_f <- read.csv("NewYork-F.csv",row.names=1)
hous_f <- read.csv("Houston-F.csv",row.names=1)

In [None]:
head(chic_f)
head(san_f)
head(ny_f)
head(hous_f)

In [None]:
#Since all these values inside the table are numeric, we can change the data frames into matrix

In [None]:
chic_f <- as.matrix(chic_f)
ny_f <- as.matrix(ny_f)
hous_f <- as.matrix(hous_f)
san_f <- as.matrix(san_f)

In [None]:
weather <- list(Chicago = chic_f, NewYork = ny_f, Houston = hous_f, SanFrancisco = san_f)

In [None]:
weather

# What is the APPLY family?

In [None]:
 apply(chic_f,1,mean) #Gives vector containing mean of all rows

In [None]:
 apply(chic_f,1,mean)  #Gives vector containing mean of all columns

In [None]:
# Saves time because no loops are required to be typed

In [None]:
# The apply family:
# apply : use on a matrix; either row or column.
# tapply : use on vector to extract subgroups and apply function to them.
# by : use on data frames. Same as GROUP BY in SQL.
# eapply : use on an Environment(E).
# lapply : apply function to all elements of list.
# sapply : a version of lapply. Can simplify (S) the result so it's not presented as a list
# vapply : has a pre-specified type of return value(V)
# replicate : run a function several times. Usually used with random variables.
# mapply : multivariate (M) version of sapply. Arguments can be recycled.
# rapply : recursive (R) version of apply.

# Using APPLY()

In [None]:
#?apply

In [None]:
 apply(chic_f, 1, mean) 
 apply(chic_f, 2, sum) 

In [None]:
apply(chic_f, 1, max)

In [None]:
apply(chic_f, 1, min)

In [None]:
apply(chic_f, 2, min)

In [None]:
apply(chic_f, 2, max)

In [None]:
apply(chic_f, 1, mean)  
apply(ny_f, 1, mean)
apply(hous_f, 1, mean)
apply(san_f, 1, mean) 

# Recreating APPLY with LOOPS 

In [None]:
# Find mean of every row

In [None]:
# 1. Via Loops

In [None]:
output <- NULL #Preparing an empty vector

In [None]:
for(i in 1:5){
   output[i] <- mean(chic_f[i,])
}

In [None]:
output

In [None]:
names(output) <- rownames(chic_f)

In [None]:
output

In [None]:
# 2. Via Apply

In [None]:
apply(chic_f, 1, mean)

In [None]:
# See difference in both

# lapply()

In [None]:
#?lapply

In [None]:
chic_f

In [None]:
t(chic_f) # Transpose

In [None]:
weather

In [None]:
# Applying transpose to each matrix and storing them in a new list

In [None]:
weather_1 <- lapply(weather, t)

In [None]:
# Adding another row to matrices

In [None]:
rbind ( chic_f, NewRow = 1:12) #Do for each, then store in a list

In [None]:
lapply( weather, rbind, NewRow = 1:12) #Easier

In [None]:
#?rowMeans

In [None]:
rowMeans(chic_f) # Identical to apply(chic_f, 1, mean)

In [None]:
lapply(weather, rowMeans) #Returns a named vector

In [None]:
#rowMeans
#colMeans
#rowSums
#colSums

# Combining lapply with [ ]

In [None]:
weather

In [None]:
weather$Chicago

In [None]:
weather[[1]]

In [None]:
weather$Chicago[1,1]

In [None]:
weather[[1]][1]

In [None]:
lapply(weather, "[",1,1) # lapply already implies [[]]

In [None]:
# "[" means single square brackets, lapply with iterate over list using [[]], and value will be
# returned using []

In [None]:
# First row for every city
lapply(weather, "[", 1,)

In [None]:
lapply(weather, "[", ,"Mar")
#lapply(weather, "[",,3)"

# Adding your own functions

In [None]:
lapply(weather, rowMeans)

In [None]:
lapply( weather, function(x) x[5,])

In [None]:
lapply( weather, function(x) x[,12]) #Dec for every city

In [None]:
lapply(weather, function(z) (round((z[1,]-z[2,])/z[2,],2))*100)

# Using sapply()

In [None]:
#?sapply

In [None]:
weather

In [None]:
lapply(weather, "[", 1, 7 ) #Returns a list

In [None]:
sapply(weather , "[", 1, 7) #Returns named vector

In [None]:
#avg_high for 4th quarter

In [None]:
lapply(weather, "[" , 1, c(10:12))

In [None]:
sapply(weather, "[" , 1, c(10:12)) #Puts them into a matrix

In [None]:
lapply(weather, rowMeans)

In [None]:
round(sapply(weather, rowMeans),2) #Returns a matrix #Deliverable 1

In [None]:
lapply(weather, function(z) (round((z[1,]-z[2,])/z[2,],2))*100)

In [None]:
sapply(weather, function(z) (round((z[1,]-z[2,])/z[2,],2))*100) #Deliverable 2

In [None]:
sapply(weather, rowMeans, simplify = F) # sapply is lapply simplified

# Nesting apply functions

In [None]:
chic_f

In [None]:
apply(chic_f, 1, max)

In [None]:
lapply(weather, apply, 1, max) #Returns max of every row in every element in R, using apply as 
                               # user defined function.

In [None]:
lapply(weather, function(x) apply(x, 1, max)) # Another approach

In [None]:
#tidyup

In [None]:
sapply(weather, apply, 1, max) #Deliverable 3
sapply(weather, apply, 1, min) #Deliverable 4

# which.max() and which.min

In [None]:
# We want to know what month was highest, names, not numbers
# which.max() and which.min() will help

In [None]:
which.max(chic_f[1,])

In [None]:
names(which.max(chic_f[1,])) #Nesting names,which.max()

In [None]:
apply(chic_f, 1, function(x) names(which.max(x))) #Nesting names,which.max() with apply()

In [None]:
lapply(weather, function(y) apply(y, 1, function(x) names(which.max(x))))
                                  # Nesting names,lapply,apply,which.max()

In [None]:
sapply(weather, function(y) apply(y, 1, function(x) names(which.max(x))))