**Load essential libraries**

In [None]:
library(ggplot2) # library for plotting
library(dplyr) # library for data wrangling

In [None]:
## Load data - refer to http://openmv.net/info/food-texture for data description 
file = 'http://openmv.net/file/food-texture.csv'
foodData = read.csv(file, header = TRUE, row.names = 1)
## Print structure of data frame
str(foodData)

In [None]:
## Print first 5 samples of data frame
head(foodData, n = 5)

In [None]:
## Modify data frame
# Modify crispy column to reflect high (0) and low (1) crispness
foodData = foodData %>% mutate(Crispy = ifelse(Crispy > 11, 'high', 'low'))

# Change Crispy column to factor type
foodData['Crispy'] = lapply(foodData['Crispy'], factor)

In [None]:
## Print structure of modified data frame
str(foodData)

In [None]:
## Print first 5 samples of modified data frame
head(foodData, n = 5)

In [None]:
## Scatter plot between Density (x-axis) and Hardness (y-axis)
p1 = ggplot(data = foodData, aes(x = Density, y = Hardness)) +
  geom_point(size = 1) 
p1

In [None]:
## Scatter plot between Density (x-axis) and Hardness (y-axis) color coded using Crispy
p2 = ggplot(data = foodData, aes(x = Density, y = Hardness, color = factor(Crispy))) +
  geom_point(size = 1) 
p2

In [None]:
## Print correlation between Density and Hardness
cor(foodData$Density, foodData$Hardness, method = 'pearson')

In [None]:
# Correlation between the continuous features 
print(cor(foodData %>% select(-c('Crispy'))))

In [None]:
## Select data frame consisiting of two features: Density, Hardness, & Crispy
foodData3 = foodData %>% select(c('Density', 'Hardness', 'Crispy'))

In [None]:
head(foodData3, n = 5)

In [None]:
# Mean center the data matrix
X = as.matrix(foodData3 %>% select(-c('Crispy')))
X_m = X - rep(1, nrow(X)) %*% t(colMeans(X))

In [None]:
# Calculate the sample covariance matrix
S = (1/(nrow(X_m)-1))*t(X_m) %*% X
print(S)
S = cov(foodData3 %>% select(-c('Crispy')))
print(S)

In [None]:
# Calculate eigenvalues & eigenvectors of sample covariance matrix
e = eigen(S)
V = e$vectors
lambda = e$values 
print(V)
print(lambda)

In [None]:
## Project samples onto the direction of the first and second eigenvectors

V = -V 
# Calculate shadow length of data
shadowLength1 = X %*% V[, 1]
shadowLength1 = as.numeric(shadowLength1)
shadowLength2 = X %*% V[, 2]
shadowLength2 = as.numeric(shadowLength2)

# Vector projection
projectedSamples1 = V[, 1] %*% t(as.matrix(shadowLength1))
projectedSamples2 = V[, 2] %*% t(as.matrix(shadowLength2))

In [None]:
## Scatter plot of Density and Hardness, color coded using Crispy and first
## two eigenvectors with the projected data on to the first principal direction
## also color coded using Crispy
V = -V # this is a minor adjustment to flip the sign of the eigenvectors
p3 = foodData3 %>% ggplot(aes(x = Density, y = Hardness, color = factor(Crispy))) +
  geom_point(size = 1) +
  geom_segment(aes(x = 0, y = 0, xend = 100*V[1, 1], yend = 100*V[2, 1]), size = 0.5,
   arrow = arrow(length = unit(0.1,"cm")), color = 'red') +
  geom_segment(aes(x = 0, y = 0, xend = 100*V[1, 2], yend = 100*V[2, 2]), size = 0.5,
   arrow = arrow(length = unit(0.1,"cm")), color = 'blue') +
  geom_point(aes(x = projectedSamples1[1, ], y = projectedSamples1[2, ], color = factor(Crispy)), shape = 4, size = 2.0) 
p3

In [None]:
## Scatter plot of Density and Hardness, color coded using Crispy and first
## two eigenvectors with the projected data on to the second principal direction
## also color coded using Crispy
p4 = foodData3 %>% ggplot(aes(x = Density, y = Hardness, color = factor(Crispy))) +
  geom_point(size = 1) +
  geom_segment(aes(x = 0, y = 0, xend = 100*V[1, 1], yend = 100*V[2, 1]), size = 0.5,
   arrow = arrow(length = unit(0.1,"cm")), color = 'red') +
  geom_segment(aes(x = 0, y = 0, xend = 100*V[1, 2], yend = 100*V[2, 2]), size = 0.5,
   arrow = arrow(length = unit(0.1,"cm")), color = 'blue') +
  geom_point(aes(x = projectedSamples2[1, ], y = projectedSamples2[2, ], color = factor(Crispy)), shape = 4, size = 2.0) 
p4