-
Notifications
You must be signed in to change notification settings - Fork 0
/
kmeans.clust.R
62 lines (56 loc) · 2.45 KB
/
kmeans.clust.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
kmeans.clust <- function(input.mat=NULL, tot.clust=NULL, seed=5995, cluster.sequence="narrow", growth.rate=0.2){
#set up a list to store clustering output
temp.clusters <- list()
if (cluster.sequence == "narrow"){
##begin loop that will perform 2:tot.clust iterations
for (i in 2:tot.clust){
##set the seed
set.seed(seed)
##perform clustering
temp.kmeans <- kmeans(input.mat, i, iter.max=25)
##save a copy of the output
temp.clusters[[i]] <- temp.kmeans
}
##now want to pull out the clusters and store them in a dataframe
cluster.labels <- data.frame("Labels" = names(temp.clusters[[2]]$cluster))
for (i in 2:tot.clust){
cluster.labels$temp <- temp.clusters[[i]]$cluster
names(cluster.labels)[ncol(cluster.labels)] <- paste0("Clusterk",i)
}
}
else if (cluster.sequence == "broad"){
##generate a range of clusters using a geometric growth of growth rate each iteration from 2 to tot.clust
clust.seq <- list()
current.clust <- 2
j <- 1
while(j <= tot.clust & current.clust < 617){
clust.seq[[j]] <- current.clust
current.clust <- ceiling((1 + growth.rate)*current.clust)
j <- j+1
}
##provide this list of clusters to the loop defined for the narrow search above
clust.seq <- unlist(clust.seq)
##begin loop that will perform 2:length(clust.seq) iterations
for (i in 1:length(clust.seq)){
##set the seed
set.seed(seed)
##perform clustering
temp.kmeans <- kmeans(input.mat, clust.seq[i], iter.max=25)
##save a copy of the output
temp.clusters[[paste0(clust.seq[i])]] <- temp.kmeans
}
##now want to pull out the clusters and store them in a dataframe
cluster.labels <- data.frame("Labels" = names(temp.clusters[["2"]]$cluster))
for (i in 1:length(clust.seq)){
cluster.labels$temp <- temp.clusters[[i]]$cluster
names(cluster.labels)[ncol(cluster.labels)] <- paste0("Clusterk",clust.seq[i])
}
}
else {
stop("Please specify cluster.sequence to be either 'narrow' or 'broad'", call.=FALSE)
}
##prepare output
return.objects <- list("cluster.labels" = cluster.labels,
"kmeans.output.list" = temp.clusters)
return(return.objects)
}