In [3]:
library(ggplot2)

# Load data

Set up the folder that contains data.

In [40]:
c_folder_name = 'NIH/Emotion'

Load adjusted connectome data.

In [41]:
c = read.table(paste('Data/Connectome/', c_folder_name, '/Adjust/c_adj.csv', sep = ''), sep = ',', header = TRUE, row.names = 1)

# Center and scale connectome data

Before applying PCA, scale the data (already centered due to the adjustment) and save scaling parameters.

In [42]:
c_sd = scale(c, center = FALSE, scale = TRUE)

In [43]:
sds = apply(c, 2, sd)
write.csv(data.frame('c_pair' = names(sds), 'sd' = sds), paste('Data/Connectome/', c_folder_name, '/PCA/sd.csv', sep = ''), row.names = FALSE)

# PCA

Find principal components for standardized data.

In [44]:
pca = prcomp(c_sd)

Save rotation matrix and principal components.

In [45]:
write.csv(data.frame('c_pair' = rownames(pca$rotation), pca$rotation), file = paste('Data/Connectome/', c_folder_name, '/PCA/rotation.csv', sep = ''), row.names = FALSE)
write.csv(data.frame('Subject' = rownames(c), pca$x), file = paste('Data/Connectome/', c_folder_name, '/PCA/c_pca.csv', sep = ''), row.names = FALSE)

Save variance explained vs #components.

In [46]:
var = pca$sdev^2
var_exp = cumsum(var)/sum(var)
write.csv(data.frame(var_exp), paste('Data/Connectome/', c_folder_name, '/PCA/ve.csv', sep = ''), row.names = FALSE)

Plot variance explained vs #components.

In [47]:
p = ggplot(NULL, aes(x = 1:length(var_exp), y = var_exp)) + 
    geom_point(size = 1, shape = 16) +
    geom_line() +
    xlab('#components') +
    ylab('variance explained')+
    ggtitle('Variance explained')
for (threshold in c(0.9, 0.8, 0.7, 0.6)){
    n_pca = sum(var_exp<=threshold)
    p = p + geom_hline(yintercept = threshold, linetype = "dashed", colour = 'red') +
    annotate("text", x = length(var_exp)/20, y = threshold + 0.01, label = paste('#comp =', n_pca))
}
ggsave(file = paste('Data/Connectome/', c_folder_name, '/PCA/ve_plot.png', sep = ''), device = 'png', plot = p, width = 10, height = 10)