In [25]:
source("./Ordered K-means_additional.R")


library(ggplot2)
library(gridExtra)
library(ggpubr)
library(ggthemes)
library(GGally)
library(RColorBrewer)
library(corrplot)
library(dplyr)
library(pdfCluster)
library(fossil)
options(warn=-1)
library(Rtsne)
library(latex2exp)

# Function

In [26]:
nor_minmax = function(x){
  xx=matrix(0,nrow(x),ncol(x))
  colnames(xx)=colnames(x)
  for(i in 1:ncol(x)){
    xx[,i]=(x[,i] - min(x[,i])) / (max(x[,i]) - min(x[,i]))
  }
  return(xx)
}

setting_K7_unbalanced<-function(mu_or=1,n_k_lst=21*abs(c(1:7)-4)+20,
                                p=20,seed=1234,mu_no=1){
    set.seed(seed)
    K=length(n_k_lst)
    n=sum(n_k_lst)
    
    for(i in 1:K){
        if(i==1){
            clu_k=c(rep(i,n_k_lst[i]))
        }else{
            clu_k=c(clu_k,rep(i,n_k_lst[i]))
        }
    }
    
    mu_1=mu_or*(1/3)*(clu_k)^2
    mu_2=mu_or*(-10)*(clu_k)^(1/2)
    
    # Data generation
    x1=mu_1+rnorm(n,0,(1+1/clu_k))
    x2=mu_2+rnorm(n,0,(1+1/clu_k))
    
    data_x=scale(cbind(x1,x2))
    
    ############################
    # Data corresponding to nominal clusters
    mu_no_mat=diag(K)
    
    # Nominal cluster generation
    x_no=matrix(0,n,ncol(mu_no_mat))
    p_lst=runif(n,0,1)
    clu_no=p_lst*0
    for(i in 1:K){
        clu_no=clu_no+(p_lst>=((i-1)/8))
    }
    #clu_no
    for(i in 1:nrow(x_no)){
        x_no[i,]=mu_no*mu_no_mat[clu_no[i],]
    }

    for(i in 1:ncol(x_no)){
        x_no[,i]=x_no[,i]+rnorm(nrow(x_no),0,1)
    }

    colnames(x_no)=paste("x_no",1:(ncol(x_no)),sep="_")
    
    ###############################
    # Noise variables
    p_e=p-ncol(data_x)-ncol(x_no)
    noise_mat=nor_minmax(matrix(rnorm(n*p_e,0,1),n,p_e))
    colnames(noise_mat)=paste("noise",1:(ncol(noise_mat)),sep="_")
    
    data=scale(cbind(data_x,x_no,noise_mat))
    
    ## clustering
    kk=K
    okm=okm_basic(data=data,k=kk,seed=seed+102)
    okm_cluster=okm$cluster
    
    
    kendall=cor(clu_k,okm$cluster,method="kendall")
    rand=rand.index(clu_k,okm$cluster)
    
    return(list(okm_clu=okm$cluster,cluster=clu_k,clu_no=clu_no,kendall=kendall,rand=rand))
}

# Second 50 iterations

In [35]:
iter_n=50

In [36]:
ss=612
set.seed(ss)
seed_r=seed_r=round(runif(20000,1,30000))
mu_ord=0.325

models_0=list()
models_1=list()
models_2=list()
models_3=list()
models_4=list()
models_5=list()

models_6=list()
models_7=list()
models_8=list()
models_9=list()
models_10=list()

Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[1*i+2002]
    models_0[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/0.1),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[10*i+2]
    models_1[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/0.3),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[11*i+21]
    models_2[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/0.5),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[10*i+147]
    models_3[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/0.7),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[10*i+1002]
    models_4[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/0.9),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[10*i+2000]
    models_5[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/1.1),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[10*i+232]
    models_6[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/1.3),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[31*i+81]
    models_7[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/1.5),seed=seed_rr,mu_no=4)   
}

Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[19*(i-12)+6501]
    models_8[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/1.7),seed=seed_rr,mu_no=4)   
}
Sys.time()

Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[i+122]
    
    models_9[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/1.9),seed=seed_rr,mu_no=4)   
}
Sys.time()
for(i in 1:iter_n){
    seed_rr=seed_r[1*i+4400]
    models_10[[i]]=setting_K7_unbalanced(mu_or=mu_ord*(1/2.1),seed=seed_rr,mu_no=4)   
}
Sys.time()


[1] "2024-08-10 12:58:08 KST"

[1] "2024-08-10 13:36:19 KST"

[1] "2024-08-10 14:17:19 KST"

[1] "2024-08-10 15:06:42 KST"

[1] "2024-08-10 15:56:46 KST"

[1] "2024-08-10 16:42:19 KST"

[1] "2024-08-10 17:27:12 KST"

[1] "2024-08-10 18:05:48 KST"

[1] "2024-08-10 18:40:09 KST"

[1] "2024-08-10 19:04:41 KST"

[1] "2024-08-10 19:04:41 KST"

[1] "2024-08-10 19:29:52 KST"

[1] "2024-08-10 19:56:35 KST"

In [37]:
length(models_10)

# List

In [38]:
mm_lst_or_2=list()
mm_lst_or_2[[1]]=models_0
mm_lst_or_2[[2]]=models_1
mm_lst_or_2[[3]]=models_2
mm_lst_or_2[[4]]=models_3
mm_lst_or_2[[5]]=models_4
mm_lst_or_2[[6]]=models_5
mm_lst_or_2[[7]]=models_6
mm_lst_or_2[[8]]=models_7
mm_lst_or_2[[9]]=models_8
mm_lst_or_2[[10]]=models_9
mm_lst_or_2[[11]]=models_10

saveRDS(mm_lst_or_2,file="OKM_add_simul_or_2.RData")

# Results with the Additional_simulation_or_1


## Load additional_simulation_or_1 results

In [46]:
mm_lst_or_1=readRDS("OKM_add_simul_or_1.RData")

### Combine the results of  additional_simulation_or_1  and 2.

In [47]:
mm_lst=list()
for(i in 1:length(mm_lst_or_1)){
    m=list()
    m_1=mm_lst_or_1[[i]]
    m_2=mm_lst_or_2[[i]]
    
    l_1=length(m_1)
    l_2=length(m_2)
               
    for(j in 1:l_1){
        m[[j]]=m_1[[j]]
    }
    for(j in 1:l_2){
        m[[j+l_2]]=m_2[[j]]
    }
    
    mm_lst[[i]]=m
}
    
saveRDS(mm_lst,file="OKM_simul_add_K7_CHI_ord.RData")

### Rand index

In [48]:
rand_or_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))
rand_no_mat=matrix(0,length(mm_lst),length(mm_lst[[1]]))

for(i in 1:length(mm_lst)){
    for(j in 1:length(mm_lst[[1]])){
        mm=mm_lst[[i]][[j]]
        rand_or=rand.index(mm$cluster,mm$okm_clu)
        rand_no=rand.index(mm$clu_no,mm$okm_clu)
        
        rand_or_mat[i,j]=rand_or
        rand_no_mat[i,j]=rand_no
        
    }
}

rand_or_lst=apply(rand_or_mat,1,mean)
rand_no_lst=apply(rand_no_mat,1,mean)

rand_okm=list()
rand_okm[["or"]]=rand_or_lst
rand_okm[["no"]]=rand_no_lst

saveRDS(rand_okm,file="OKM_rand_add_K7_CHI_ord.RData")

In [49]:
rand_okm