In [46]:
source("./Ordered K-means.R")


library(ggplot2)
library(gridExtra)
library(ggpubr)
library(ggthemes)
library(GGally)
library(RColorBrewer)
library(corrplot)
library(dplyr)
library(pdfCluster)
library(fossil)
options(warn=-1)
library(Rtsne)
library(latex2exp)

In [47]:
nor_minmax = function(x){
  xx=matrix(0,nrow(x),ncol(x))
  colnames(xx)=colnames(x)
  for(i in 1:ncol(x)){
    xx[,i]=(x[,i] - min(x[,i])) / (max(x[,i]) - min(x[,i]))
  }
  return(xx)
}

In [48]:
setting_K4<-function(mu_or=1,n_k_lst=c(50,50,50,50),
                                p=20,seed=1234,mu_no=10){
    set.seed(seed)
    K=length(n_k_lst)
    n=sum(n_k_lst)
    
    for(i in 1:K){
        if(i==1){
            clu_k=c(rep(i,n_k_lst[i]))
        }else{
            clu_k=c(clu_k,rep(i,n_k_lst[i]))
        }
    }
    
    mu_1=mu_or*(2*clu_k)
    mu_2=-mu_or*(exp(clu_k)/10)
    mu_3=mu_or*(7*log(clu_k))
    
    
    # Data generation
    x1=mu_1+rnorm(n,0,1)
    x2=mu_2+rnorm(n,0,1)
    x3=mu_3+rnorm(n,0,1)
    
    data_x=scale(cbind(x1,x2,x3))
    
    ############################
    # Data corresponding to nominal clusters
    mu_no_mat=diag(K)
    
    # Nominal cluster generation
    x_no=matrix(0,n,ncol(mu_no_mat))
    p_lst=runif(n,0,1)
    clu_no=p_lst*0
    for(i in 1:K){
        clu_no=clu_no+(p_lst>=((i-1)/K))
    }
    #clu_no
    for(i in 1:nrow(x_no)){
        x_no[i,]=mu_no*mu_no_mat[clu_no[i],]
    }

    for(i in 1:ncol(x_no)){
        x_no[,i]=x_no[,i]+rnorm(nrow(x_no),0,1)
    }

    colnames(x_no)=paste("x_no",1:(ncol(x_no)),sep="_")
    
    ###############################
    # Noise variables
    p_e=p-ncol(data_x)-ncol(x_no)
    noise_mat=nor_minmax(matrix(rnorm(n*p_e,0,1),n,p_e))
    colnames(noise_mat)=paste("noise",1:(ncol(noise_mat)),sep="_")
    
    data=scale(cbind(data_x,x_no,noise_mat))
    
    ## clustering
    kk=K
    okm=okm_basic(data=data,k=kk,seed=seed+102)
    okm_cluster=okm$cluster
    
    
    kendall=cor(clu_k,okm$cluster,method="kendall")
    rand=rand.index(clu_k,okm$cluster)
    
    return(list(okm_clu=okm$cluster,cluster=clu_k,clu_no=clu_no,kendall=kendall,rand=rand))
}

## Clustering

In [49]:
iter_n=100
#iter_n=5

In [50]:
ss=43
set.seed(ss)
seed_r=round(runif(25000,1,30000))
models_0=list()
models_1=list()
models_2=list()
models_3=list()
models_4=list()
models_5=list()

Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+22]
    models_0[[i]]=setting_K4(mu_or=0.792*(1/0.1),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+22]
    models_1[[i]]=setting_K4(mu_or=0.792*(1/0.3),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+21]
    models_2[[i]]=setting_K4(mu_or=0.792*(1/0.5),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+224]
    models_3[[i]]=setting_K4(mu_or=0.792*(1/0.7),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+25]
    models_4[[i]]=setting_K4(mu_or=0.792*(1/0.9),mu_no=5,seed=seed_rr)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+223]
    models_5[[i]]=setting_K4(mu_or=0.792*(1/1.1),mu_no=5,seed=seed_rr)  
}
Sys.time()



[1] "2024-08-09 20:10:57 KST"

[1] "2024-08-09 20:38:17 KST"

[1] "2024-08-09 21:08:00 KST"

[1] "2024-08-09 21:40:08 KST"

[1] "2024-08-09 22:14:04 KST"

[1] "2024-08-09 22:48:23 KST"

[1] "2024-08-09 23:30:30 KST"

In [51]:
ss=553
set.seed(ss)
seed_r=round(runif(25000,1,30000))

models_6=list()
models_7=list()
models_8=list()
models_9=list()
models_10=list()

Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+22]
    models_6[[i]]=setting_K4(mu_or=0.792*(1/1.3),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+21]
    models_7[[i]]=setting_K4(mu_or=0.792*(1/1.5),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+224]
    models_8[[i]]=setting_K4(mu_or=0.792*(1/1.7),mu_no=5,seed=seed_rr)  
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[120*i+25]
    models_9[[i]]=setting_K4(mu_or=0.792*(1/1.9),mu_no=5,seed=seed_rr)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[1*i+3]
    models_10[[i]]=setting_K4(mu_or=0.792*(1/2.1),mu_no=5,seed=seed_rr)  
}
Sys.time()

[1] "2024-08-09 23:30:30 KST"

[1] "2024-08-10 00:14:50 KST"

[1] "2024-08-10 00:51:33 KST"

[1] "2024-08-10 01:21:00 KST"

[1] "2024-08-10 01:53:29 KST"

[1] "2024-08-10 02:26:03 KST"

# Results

In [52]:
mm_lst=list()
mm_lst[[1]]=models_0
mm_lst[[2]]=models_1
mm_lst[[3]]=models_2
mm_lst[[4]]=models_3
mm_lst[[5]]=models_4
mm_lst[[6]]=models_5
mm_lst[[7]]=models_6
mm_lst[[8]]=models_7
mm_lst[[9]]=models_8
mm_lst[[10]]=models_9
mm_lst[[11]]=models_10

In [53]:
#####################################################
# Save the results
saveRDS(mm_lst,file="OKM_Simulation_CHI_ord_results.RData")


In [54]:
rand_or_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))
rand_no_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))

for(i in 1:length(mm_lst)){
    for(j in 1:length(mm_lst[[1]])){
        mm=mm_lst[[i]][[j]]
        rand_or=rand.index(mm$cluster,mm$okm_clu)
        rand_no=rand.index(mm$clu_no,mm$okm_clu)
        
        rand_or_mat[i,j]=rand_or
        rand_no_mat[i,j]=rand_no
        
    }
}


In [56]:
rand_or_lst=apply(rand_or_mat,1,mean)
rand_no_lst=apply(rand_no_mat,1,mean)

rand_okm=list()
rand_okm[["or"]]=rand_or_lst
rand_okm[["no"]]=rand_no_lst

saveRDS(rand_okm,file="OKM_rand_CHI_ord.RData")