# Characterization of methylation clusters

In [70]:
suppressMessages({
    library(tidyverse)
    library(magrittr)
    library(pheatmap)
    library(viridis)
})

In [11]:
methylation.path = '../../data/Figueroa/clusters/methyl-cluster-k30.csv'
methylation.normalized = read_csv(methylation.path)

Parsed with column specification:
cols(
  .default = col_double(),
  probe = col_character(),
  communities = col_integer()
)
See spec(...) for full column specifications.


In [12]:
head(methylation.normalized, n=2)

GSM464771,GSM464772,GSM464773,GSM464774,GSM464775,GSM464776,GSM464777,GSM464778,GSM464779,GSM464780,⋯,GSM465026,GSM465030,GSM465060,GSM465061,GSM465062,GSM465063,GSM465064,GSM465065,probe,communities
0.4997071,-0.9638502,0.730679,0.295602,-1.786108,-0.28102832,-0.9613261,0.1343021,-0.7236957,-0.7220128,⋯,-0.07390868,0.495822,0.186514,2.849106,-0.8216546,-0.489393,0.2317528,-0.2321558,LOC100133331,13
0.7429656,0.43204,1.309134,-1.562857,-1.736423,0.05641418,-0.3386436,-0.1086917,-1.0315853,-0.1469336,⋯,0.82305359,-0.1565679,-0.285529,2.057526,-1.1154584,-0.8165077,-0.5661085,-0.3184803,AK091100,2


In [13]:
methylation.normalized %<>%
  arrange(communities)

methylation.mat = data.matrix(methylation.normalized %>% select(-probe))

In [29]:
rownames.probes = data.frame(probe = methylation.normalized$probe) %>%
  group_by(probe) %>%
  mutate(n = seq(n())) %>%
  unite(probe, c('probe', 'n'))

In [72]:
set.seed(1)
t = 2000
rand.idx = sort(sample(1:dim(methylation.mat)[1], t))
test.mat = methylation.mat[rand.idx,]
X <- test.mat
f.diff = c()
while (TRUE) {
    X.prev <- X
    X <- t(apply(X, 1, scale))
    X <- apply(X, 2, scale)
  
    F.diff <- norm(X, type='F') - norm(X.prev, type='F') # compute the Frobenius norm difference between two matrices
    print(F.diff)
    f.diff <- c(f.diff, F.diff)
    if (abs(F.diff) < 1e-12) break
}

rownames(X) = rownames.probes$probe[rand.idx]
annotation = data.frame(cluster=as.factor(methylation.mat[rand.idx,'communities']))
rownames(annotation) = rownames.probes$probe[rand.idx]
pheatmap(
        X,
        cluster_row = FALSE,
        cluster_col = TRUE,
        show_rownames = FALSE,
        show_colnames = FALSE,
        annotation_row = annotation,
        annotation_legend = FALSE,
        filename = '../../figures/Figueroa/methyl-clustering.png',
        color = viridis(100)
        )

[1] -46.67813
[1] -1.364242e-11
[1] 1.011813e-11
[1] -5.115908e-12
[1] 6.139089e-12
[1] -1.364242e-12
[1] 1.364242e-12
[1] 5.911716e-12
[1] -1.591616e-11
[1] -2.955858e-12
[1] 1.102762e-11
[1] -5.456968e-12
[1] -1.591616e-12
[1] 4.547474e-12
[1] 2.046363e-12
[1] 8.071765e-12
[1] 1.102762e-11
[1] -2.899014e-11
[1] 1.330136e-11
[1] -1.023182e-12
[1] -3.865352e-12
[1] -6.252776e-12
[1] 6.934897e-12
[1] -9.094947e-13


## Annotation of clusters by gene ontology