In [81]:
source("./Ordered K-means.R")


library(ggplot2)
library(gridExtra)
library(ggpubr)
library(ggthemes)
library(GGally)
library(RColorBrewer)
library(corrplot)
library(dplyr)
library(pdfCluster)
library(fossil)
options(warn=-1)
library(Rtsne)
library(latex2exp)

In [82]:
nor_minmax = function(x){
  xx=matrix(0,nrow(x),ncol(x))
  colnames(xx)=colnames(x)
  for(i in 1:ncol(x)){
    xx[,i]=(x[,i] - min(x[,i])) / (max(x[,i]) - min(x[,i]))
  }
  return(xx)
}

# CHR

In [83]:
clu_kk=c(1:4)
mu_or=1

x1=mu_or*2*clu_kk
x2=-mu_or*(1/10)*exp(clu_kk)
x3=mu_or*7*log(clu_kk)

mu_mat_or=cbind(x1,x2,x3)
mu_v_or=apply(mu_mat_or,2,mean)

for(i in 1:4){
    if(i==1){
        m_or=mu_mat_or[i,]%*%t(mu_mat_or[i,])
    }else{
        m_or=m_or+mu_mat_or[i,]%*%t(mu_mat_or[i,])
    }
}
bc_or=sum(diag(m_or/4-mu_v_or%*%t(mu_v_or)))
wc_or=sum(diag(4^(-1)*4*diag(3)))

CHI_or=bc_or/wc_or


###################################
mu_no=6.32
mu_mat_no=mu_no*diag(4)
mu_v_no=apply(mu_mat_no,2,mean)

for(i in 1:4){
    if(i==1){
        m_no=mu_mat_no[i,]%*%t(mu_mat_no[i,])
    }else{
        m_no=m_no+mu_mat_no[i,]%*%t(mu_mat_no[i,])
    }
}
bc_no=sum(diag(m_no/4-mu_v_no%*%t(mu_v_no)))
wc_no=sum(diag(4^(-1)*4*diag(4)))

CHI_no=bc_no/wc_no


sqrt(CHI_no/CHI_or)

# Simulation setting function

In [84]:
setting_K4<-function(mu_or=1,n_k_lst=c(50,50,50,50),
                                p=20,seed=1234,mu_no=10){
    set.seed(seed)
    K=length(n_k_lst)
    n=sum(n_k_lst)
    
    for(i in 1:K){
        if(i==1){
            clu_k=c(rep(i,n_k_lst[i]))
        }else{
            clu_k=c(clu_k,rep(i,n_k_lst[i]))
        }
    }
    
    mu_1=mu_or*(2*clu_k)
    mu_2=-mu_or*(exp(clu_k)/10)
    mu_3=mu_or*(7*log(clu_k))
    
    
    # Data generation
    x1=mu_1+rnorm(n,0,1)
    x2=mu_2+rnorm(n,0,1)
    x3=mu_3+rnorm(n,0,1)
    
    data_x=scale(cbind(x1,x2,x3))
    
    ############################
    # Data corresponding to nominal clusters
    mu_no_mat=diag(K)
    
    # Nominal cluster generation
    x_no=matrix(0,n,ncol(mu_no_mat))
    p_lst=runif(n,0,1)
    clu_no=p_lst*0
    for(i in 1:K){
        clu_no=clu_no+(p_lst>=((i-1)/K))
    }
    #clu_no
    for(i in 1:nrow(x_no)){
        x_no[i,]=mu_no*mu_no_mat[clu_no[i],]
    }

    for(i in 1:ncol(x_no)){
        x_no[,i]=x_no[,i]+rnorm(nrow(x_no),0,1)
    }

    colnames(x_no)=paste("x_no",1:(ncol(x_no)),sep="_")
    
    ###############################
    # Noise variables
    p_e=p-ncol(data_x)-ncol(x_no)
    noise_mat=nor_minmax(matrix(rnorm(n*p_e,0,1),n,p_e))
    colnames(noise_mat)=paste("noise",1:(ncol(noise_mat)),sep="_")
    
    data=scale(cbind(data_x,x_no,noise_mat))
    
    ## clustering
    kk=K
    okm=okm_basic(data=data,k=kk,seed=seed+102)
    okm_cluster=okm$cluster
    
    
    kendall=cor(clu_k,okm$cluster,method="kendall")
    rand=rand.index(clu_k,okm$cluster)
    
    return(list(okm_clu=okm$cluster,cluster=clu_k,clu_no=clu_no,kendall=kendall,rand=rand))
}

# Iteration

In [88]:
iter_n=100
#iter_n=5

In [89]:
ss=11021
set.seed(ss)
seed_r=round(runif(20000,1,30000))

models_or=list()

models_0=list()
models_1=list()
models_2=list()
models_3=list()
models_4=list()
models_5=list()
models_6=list()
models_7=list()
models_8=list()
models_9=list()
models_10=list()
models_11=list()

ss=11021
set.seed(ss)
seed_r=round(runif(20000,1,30000))

models_or=list()


models_0=list()
models_1=list()
models_2=list()
models_3=list()
models_4=list()
models_5=list()
models_6=list()
models_7=list()
models_8=list()
models_9=list()
models_10=list()
models_11=list()

Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[110*i+102]
    models_0[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*0)
}

Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[110*i+1002]
    models_1[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*0.2)  
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[11*i+2100]+33 
    models_2[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*0.4)  
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[17*i+20]
    models_3[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*0.6)
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[14*i+203];
    models_4[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*0.8)
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[i+7000];
    models_5[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*1.0)
}
Sys.time()


for(i in 1:iter_n){
    seed_rr=seed_r[i+8000];
    models_6[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*1.2)
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[300+3*i];
    models_7[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*1.4)
}
Sys.time()


for(i in 1:iter_n){
    seed_rr=seed_r[70+10*i];
    models_8[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*1.6)
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[2000+7*i];
    models_9[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*1.8)
}
Sys.time()

for(i in 1:iter_n){
    seed_rr=seed_r[120+5*i];
    models_10[[i]]=setting_K4(mu_or=1,p=20,seed=seed_rr,mu_no=6.32*2)
}
Sys.time()



[1] "2024-08-09 20:10:46 KST"

[1] "2024-08-09 20:50:04 KST"

[1] "2024-08-09 21:27:06 KST"

[1] "2024-08-09 22:05:11 KST"

[1] "2024-08-09 22:42:06 KST"

[1] "2024-08-09 23:21:03 KST"

[1] "2024-08-09 23:57:57 KST"

[1] "2024-08-10 00:36:05 KST"

[1] "2024-08-10 01:14:50 KST"

[1] "2024-08-10 01:56:38 KST"

[1] "2024-08-10 02:32:42 KST"

[1] "2024-08-10 03:10:23 KST"

# Results

In [96]:
mm_lst=list()
mm_lst[[1]]=models_0
mm_lst[[2]]=models_1
mm_lst[[3]]=models_2
mm_lst[[4]]=models_3
mm_lst[[5]]=models_4
mm_lst[[6]]=models_5
mm_lst[[7]]=models_6
mm_lst[[8]]=models_7
mm_lst[[9]]=models_8
mm_lst[[10]]=models_9
mm_lst[[11]]=models_10

# Save the results
saveRDS(mm_lst,file="OKM_Simulation_CHI_nom_results.RData")

In [97]:
rand_or_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))
rand_no_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))

for(i in 1:length(mm_lst)){
    for(j in 1:length(mm_lst[[1]])){
        mm=mm_lst[[i]][[j]]
        rand_or=rand.index(mm$cluster,mm$okm_clu)
        rand_no=rand.index(mm$clu_no,mm$okm_clu)
        
        rand_or_mat[i,j]=rand_or
        rand_no_mat[i,j]=rand_no
        
    }
}


In [98]:
rand_or_lst=apply(rand_or_mat,1,mean)
rand_no_lst=apply(rand_no_mat,1,mean)

rand_okm=list()
rand_okm[["or"]]=rand_or_lst
rand_okm[["no"]]=rand_no_lst

saveRDS(rand_okm,file="OKM_rand_CHI_nom.RData")