In [0]:
!pip install coclust
!pip install git+git://github.com/GGiecold/Cluster_Ensembles.git
!apt-get install metis

Collecting coclust
  Downloading https://files.pythonhosted.org/packages/5d/44/ad5a69c7187c2b7bcf2c45596e9052811a3be52f4fcaa6709937c5146ee2/coclust-0.2.1.tar.gz
Building wheels for collected packages: coclust
  Building wheel for coclust (setup.py) ... [?25l[?25hdone
  Created wheel for coclust: filename=coclust-0.2.1-cp36-none-any.whl size=29872 sha256=76210239a2b17b1214d234752e11745f1cdb7662b21e8fc92542f9553bb83c88
  Stored in directory: /root/.cache/pip/wheels/cd/d7/68/df601d0b5f8b934cf890dc626c2271df381fb0c3e910b0a34e
Successfully built coclust
Installing collected packages: coclust
Successfully installed coclust-0.2.1
Collecting git+git://github.com/GGiecold/Cluster_Ensembles.git
  Cloning git://github.com/GGiecold/Cluster_Ensembles.git to /tmp/pip-req-build-dcwvnsie
  Running command git clone -q git://github.com/GGiecold/Cluster_Ensembles.git /tmp/pip-req-build-dcwvnsie
Building wheels for collected packages: Cluster-Ensembles
  Building wheel for Cluster-Ensembles (setup.py) 

In [0]:
from google.colab import drive
drive.mount('/content/drive')

import os
from coclust.coclustering import CoclustInfo, CoclustMod, CoclustSpecMod
from coclust.clustering import SphericalKmeans
import Cluster_Ensembles as CE
from scipy import io
import sys
import numpy as np
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from random import *

nmi = normalized_mutual_info_score
ari = adjusted_rand_score



#################################################
# Parameters
#################################################
col_mult = 1

algos_pair = [(CoclustInfo, "CoclustInfo"), (CoclustSpecMod, "CoclustSpecMod"), (CoclustMod, "CoclustMod")]
#algos_pair = [(CoclustInfo, "CoclustInfo")]
#algos_pair = [(CoclustMod, "CoclustMod")]

#datasets = ["classic4", "cstr", "ng5", "rcv4", "ng20"]
#datasets = ["classic4", "cstr", "ng5", "rcv4"]
datasets = ["ng20"]
#################################################



#################################################
# Functions
#################################################
def make_co_assoc(row_labels):
    co_assoc = np.zeros((row_labels.shape[1], row_labels.shape[1]))
    for i in range(row_labels.shape[0]):
        labels = row_labels[i,]
        #temp = np.array([[int(i == j) for i in labels] for j in labels])
        n_values = np.max(labels) + 1
        temp = np.eye(n_values)[labels]
        temp = np.dot(temp, temp.T)
        co_assoc += temp
    return co_assoc


def run_cluster_ensembles(row_labels, number_of_classes, y):
    res = CE.cluster_ensembles(cluster_runs=row_labels, N_clusters_max=number_of_classes)
    
    #nmis.append(nmi(res, y.ravel()))
    #aris.append(ari(res, y.ravel()))
    
    return res

    
def run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y, algo, algo_name):
    co_assoc = make_co_assoc(row_labels)
        
    if algo_name == "CoclustInfo":
        model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, n_init=20, max_iter=100)
    else:
        model = algo(n_clusters=number_of_classes, n_init=20, max_iter=100)    
    model.fit(co_assoc)

    #nmis.append(nmi(model.row_labels_, y.ravel()))
    #aris.append(ari(model.row_labels_, y.ravel()))

    return model.row_labels_    
#################################################



#################################################
# RUN
#################################################
def run(dataset_name, algo_pair):
    algo = algo_pair[0]
    algo_name = algo_pair[1]
    nmis = []
    aris = []
    
    ##############
    # Load Data
    ##############
    mat_file = "/content/drive/My Drive/M2/Mixture/Dataset2/"+dataset_name+"_ti_n.mat"
    mat2_file = "/content/drive/My Drive/M2/Mixture/Dataset3/"+dataset_name+".mat"

    # Load dataset
    mat = io.loadmat(mat_file)
    #X = mat['X']
    #y = mat['y']
    X = mat['dtm']
    print(X.shape)

    # Load label
    mat2 = io.loadmat(mat2_file)
    if 'gnd' in mat2:
        y = mat2['gnd']
    elif 'labels' in mat2:
        y = mat2['labels']
    number_of_classes = len(np.unique(y))
    print(y.shape)
    print(number_of_classes)
    
    n_col_clusters = number_of_classes * col_mult

    ##############
    # 1
    ##############
    print("#################################################")
    print("# 1 - Run "+algo_name+"...")
    print("#################################################")

    criterions = []
    t_row_labels = []

    epochs = 10 if algo == CoclustSpecMod else 200
    n_init = 20 if algo == CoclustSpecMod else 1

    for i in range(epochs):
        if i % int(epochs/10) == 0:
            print(i)
        if algo == CoclustInfo:
            model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, max_iter=100, n_init=n_init)
        else:
            model = algo(n_clusters=number_of_classes, max_iter=100, n_init=n_init)
        model.fit(X)
        #print(model.criterion)
        if "criterion" in model.__dict__:
            criterions.append(model.criterion)
        elif "modularity" in model.__dict__:
            criterions.append(model.modularity)
        else:
            criterions.append(random())
        t_row_labels.append(model.row_labels_)

    row_labels = np.array(t_row_labels)
    order = list(np.flip(np.argsort(criterions))[:10])

    row_labels = row_labels[order]
    print(row_labels.shape)

    ##############
    # 2
    ##############
    print("#################################################")
    print("# 2 - Run cluster ensembles...")
    print("#################################################")

    res = run_cluster_ensembles(row_labels, number_of_classes, y)
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))

    ##############
    # 3
    ##############
    print("#################################################")
    print("# 3 - Run "+algo_name+" on co-association matrix...")
    print("#################################################")

    res = run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y, algo, algo_name)
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))    

    ##############
    # 4
    ##############
    print("#################################################")
    print("# 4 - Run spherical k-means...")
    print("#################################################")
    model = SphericalKmeans(n_clusters=number_of_classes, n_init=20, max_iter=100)
    model.fit(X)

    #row_labels = np.concatenate([row_labels, np.matrix(model.row_labels_)], axis=0)
    temp = list(row_labels)
    temp.append(model.row_labels_)
    row_labels = np.array(temp)

    res = run_cluster_ensembles(row_labels, number_of_classes, y)
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))
        
    res = run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y, algo, algo_name)
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))    

    ##############
    # Results
    ##############
    print("#################################################")
    print("# Results")
    print("#################################################")

    print(nmis)
    print(aris)
    return nmis, aris
#################################################



#datasets = ["cstr"]

for dataset in datasets:
    print("#################################################")
    print("# DATASET: "+dataset)
    print("#################################################")    
    
    for algo_pair in algos_pair:
        file_name = "/content/drive/My Drive/M2/Mixture/"+dataset+"_"+algo_pair[1]+"_"+str(col_mult)
        if os.path.exists(file_name):
            continue
            
        print("#################################################")
        print("# ALGO: "+algo_pair[1])
        print("#################################################")  
        
        best_nmis, best_aris = [0], [0]  
        for _ in range(1):
            nmis, aris = run(dataset, algo_pair)
            if np.mean([np.mean(nmis), np.mean(aris)]) > np.mean([np.mean(best_nmis), np.mean(best_aris)]):
                best_nmis, best_aris = nmis, aris

            myfile = open(file_name, "a")
            content = ', '.join(str(t) for t in nmis) + "\n" + ', '.join(str(t) for t in aris) + "\n\n"
            myfile.write(content)
            myfile.close()
                
        myfile = open(file_name, "a")
        content = ', '.join(str(t) for t in best_nmis) + "\n" + ', '.join(str(t) for t in best_aris) + "\n"
        myfile.write(content)
        myfile.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
#################################################
# DATASET: ng20
#################################################
#################################################
# ALGO: CoclustSpecMod
#################################################
(18846, 14390)
(1, 18846)
20
#################################################
# 1 - Run CoclustSpecMod...
#################################################
0
1
2
3
4
5
6
7
8
9
(10, 18846)
#################################################
# 2 - Run cluster ensembles...
#################################################

INFO: Cluster_Ensembles: cluster_ensembles: due to a rather large number of cells in your data-set, using only 'HyperGraph Partitioning Algorithm' (HGPA) and 'Meta-CLustering Algorithm' (MCLA) as ensemble consensus functions.


*****
INFO: Cluster_Ensembles: HGPA: consensus clustering using HGPA.

#
INFO: Clus




INFO: Cluster_Ensembles: cluster_ensembles: HGPA at 0.44498611677022065.
*****

*****
INFO: Cluster_Ensembles: MCLA: consensus clustering using MCLA.
INFO: Cluster_Ensembles: MCLA: preparing graph for meta-clustering.
INFO: Cluster_Ensembles: MCLA: done filling hypergraph adjacency matrix. Starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: done computing the matrix of pairwise Jaccard similarity scores.

#
INFO: Cluster_Ensembles: wgraph: writing wgraph_MCLA.
#

#
INFO: Cluster_Ensembles: sgraph: calling gpmetis for graph partitioning.
INFO: Cluster_Ensembles: sgraph: (hyper)-graph partitioning completed; loading wgraph_MCLA.part.20
#
INFO: Cluster_Ensembles: MCLA: delivering 20 clusters.
INFO: Cluster_Ensembles: MCLA: average posterior probability is 0.0935207038773779





INFO: Cluster_Ensembles: cluster_ensembles: MCLA at 0.6446055941381209.
*****
#################################################
# 3 - Run CoclustSpecMod on co-association matrix...
#################################################


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
##################################################################################################
# Consensus on Coclust Info + SpecMod + Mod + SkMeans
##################################################################################################

from google.colab import drive
drive.mount('/content/drive')

import os
from coclust.coclustering import CoclustInfo, CoclustMod, CoclustSpecMod
from coclust.clustering import SphericalKmeans
import Cluster_Ensembles as CE
from scipy import io
import sys
import numpy as np
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from random import *

nmi = normalized_mutual_info_score
ari = adjusted_rand_score



#################################################
# Parameters
#################################################
col_mult = 2

algos_pair = [(CoclustInfo, "CoclustInfo"), (CoclustSpecMod, "CoclustSpecMod"), (CoclustMod, "CoclustMod")]
#algos_pair = [(CoclustInfo, "CoclustInfo")]
#algos_pair = [(CoclustInfo, "CoclustInfo"), (CoclustMod, "CoclustMod")]

#datasets = ["classic4", "cstr", "ng5", "rcv4", "ng20"]
datasets = ["classic4", "cstr", "ng5", "rcv4"]
#datasets = ["ng20"]
#################################################



#################################################
# Functions
#################################################
def make_co_assoc(row_labels):
    co_assoc = np.zeros((row_labels.shape[1], row_labels.shape[1]))
    for i in range(row_labels.shape[0]):
        labels = row_labels[i,]
        #temp = np.array([[int(i == j) for i in labels] for j in labels])
        n_values = np.max(labels) + 1
        temp = np.eye(n_values)[labels]
        temp = np.dot(temp, temp.T)
        co_assoc += temp
    return co_assoc


def run_cluster_ensembles(row_labels, number_of_classes, y):
    res = CE.cluster_ensembles(cluster_runs=row_labels, N_clusters_max=number_of_classes)
    
    #nmis.append(nmi(res, y.ravel()))
    #aris.append(ari(res, y.ravel()))
    
    return res

    
def run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y, algo, algo_name):
    co_assoc = make_co_assoc(row_labels)
        
    if algo_name == "CoclustInfo":
        model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, n_init=20, max_iter=100)
    else:
        model = algo(n_clusters=number_of_classes, n_init=20, max_iter=100)    
    model.fit(co_assoc)

    #nmis.append(nmi(model.row_labels_, y.ravel()))
    #aris.append(ari(model.row_labels_, y.ravel()))

    return model.row_labels_    
#################################################



#################################################
# RUN
#################################################
def run(dataset_name, algos_pair):
    nmis = []
    aris = []
    
    ##############
    # Load Data
    ##############
    mat_file = "/content/drive/My Drive/M2/Mixture/Dataset2/"+dataset_name+"_ti_n.mat"
    mat2_file = "/content/drive/My Drive/M2/Mixture/Dataset3/"+dataset_name+".mat"

    # Load dataset
    mat = io.loadmat(mat_file)
    #X = mat['X']
    #y = mat['y']
    X = mat['dtm']
    print(X.shape)

    # Load label
    mat2 = io.loadmat(mat2_file)
    if 'gnd' in mat2:
        y = mat2['gnd']
    elif 'labels' in mat2:
        y = mat2['labels']
    number_of_classes = len(np.unique(y))
    print(y.shape)
    print(number_of_classes)
    
    n_col_clusters = number_of_classes * col_mult

    ##############
    # 1
    ##############
    all_row_labels = []
    for algo, algo_name in algos_pair:
        print("#################################################")
        print("# 1 - Run "+algo_name+"...")
        print("#################################################")

        criterions = []
        t_row_labels = []      
        epochs = 3 if algo == CoclustSpecMod else 3 * 20
        n_init = 20 if algo == CoclustSpecMod else 1

        for i in range(epochs):
            if i % max(1, int(epochs/10)) == 0:
                print(i)
            if algo == CoclustInfo:
                model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, max_iter=100, n_init=n_init)
            else:
                model = algo(n_clusters=number_of_classes, max_iter=100, n_init=n_init)
            model.fit(X)
            #print(model.criterion)
            if "criterion" in model.__dict__:
                criterions.append(model.criterion)
            elif "modularity" in model.__dict__:
                criterions.append(model.modularity)
            else:
                criterions.append(random())
            t_row_labels.append(model.row_labels_)

        row_labels = np.array(t_row_labels)
        order = list(np.flip(np.argsort(criterions))[:3])
        row_labels = row_labels[order]

        temp = list(all_row_labels)
        temp.extend(row_labels)
        all_row_labels = np.array(temp)       

    print(all_row_labels.shape)

    print("#################################################")
    print("# 1 - Run spherical k-means...")
    print("#################################################")
    for i in range(3):
        model = SphericalKmeans(n_clusters=number_of_classes, n_init=20, max_iter=100)
        model.fit(X)

        temp = list(all_row_labels)
        temp.append(model.row_labels_)
        all_row_labels = np.array(temp)       

    print(all_row_labels.shape)

    ##############
    # 2
    ##############
    print("#################################################")
    print("# 2 - Run cluster ensembles...")
    print("#################################################")

    res = run_cluster_ensembles(all_row_labels, number_of_classes, y)
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))

    ##############
    # 3
    ##############
    for algo, algo_name in algos_pair:
        print("#################################################")
        print("# 3 - Run "+algo_name+" on co-association matrix...")
        print("#################################################")      
        res = run_coclust_co_assoc(all_row_labels, number_of_classes, n_col_clusters, y, algo, algo_name)
        nmis.append(nmi(res, y.ravel()))
        aris.append(ari(res, y.ravel()))    

    ##############
    # Results
    ##############
    print("#################################################")
    print("# Results")
    print("#################################################")

    print(nmis)
    print(aris)
    return nmis, aris
#################################################



#datasets = ["cstr"]

for dataset in datasets:
    print("#################################################")
    print("# DATASET: "+dataset)
    print("#################################################")    
    
    file_name = "/content/drive/My Drive/M2/Mixture/"+dataset+"_all_models_"+str(col_mult)
    if os.path.exists(file_name):
        continue
    
    for _ in range(1):
        nmis, aris = run(dataset, algos_pair)

        myfile = open(file_name, "a")
        content = ', '.join(str(t) for t in nmis) + "\n" + ', '.join(str(t) for t in aris) + "\n\n"
        myfile.write(content)
        myfile.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
#################################################
# DATASET: classic4
#################################################
(7094, 5896)
(1, 7094)
4
#################################################
# 1 - Run CoclustInfo...
#################################################
0
6
12
18
24
30
36
42
48
54
#################################################
# 1 - Run CoclustSpecMod...
#################################################
0
1
2
#################################################
# 1 - Run CoclustMod...
#################################################
0
6
12
18
24
30
36
42
48
54
(9, 7094)
#################################################
# 1 - Run spherical k-means...
#################################################
 == New init == 
iteration: 0
772.8315925037664
iteration: 1
912.6335734586612
iteration: 2
980.9680205037344
iteration: 3
1013.703305781434
itera




INFO: Cluster_Ensembles: cluster_ensembles: CSPA at 0.480093122368056.
*****

*****
INFO: Cluster_Ensembles: HGPA: consensus clustering using HGPA.

#
INFO: Cluster_Ensembles: wgraph: writing wgraph_HGPA.
INFO: Cluster_Ensembles: wgraph: 7094 vertices and 48 non-zero hyper-edges.
#

#
INFO: Cluster_Ensembles: sgraph: calling shmetis for hypergraph partitioning.
INFO: Cluster_Ensembles: sgraph: (hyper)-graph partitioning completed; loading wgraph_HGPA.part.4
#





INFO: Cluster_Ensembles: cluster_ensembles: HGPA at 0.10681624266842096.
*****

*****
INFO: Cluster_Ensembles: MCLA: consensus clustering using MCLA.
INFO: Cluster_Ensembles: MCLA: preparing graph for meta-clustering.
INFO: Cluster_Ensembles: MCLA: done filling hypergraph adjacency matrix. Starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: done computing the matrix of pairwise Jaccard similarity scores.

#
INFO: Cluster_Ensembles: wgraph: writing wgraph_MCLA.
#

#
INFO: Cluster_Ensembles: sgraph: calling gpmetis for graph partitioning.
INFO: Cluster_Ensembles: sgraph: (hyper)-graph partitioning completed; loading wgraph_MCLA.part.4
#
INFO: Cluster_Ensembles: MCLA: delivering 4 clusters.
INFO: Cluster_Ensembles: MCLA: average posterior probability is 0.44488345811126445





INFO: Cluster_Ensembles: cluster_ensembles: MCLA at 0.5814695909496972.
*****
#################################################
# 3 - Run CoclustMod on co-association matrix...
#################################################




#################################################
# Results
#################################################
[0.7585136329262427, 0.7698345602749281, 0.7715481684989948, 0.753683235365091]
[0.6782326299638243, 0.7171508510026902, 0.7176532340780195, 0.7010967281852717]
#################################################
# DATASET: cstr
#################################################
(475, 1000)
(475, 1)
4
#################################################
# 1 - Run CoclustInfo...
#################################################
0
6
12
18
24
30
36
42
48
54
#################################################
# 1 - Run CoclustSpecMod...
#################################################
0
1
2
#################################################
# 1 - Run CoclustMod...
#################################################
0
6
12
18
24
30
36
42
48
54
(9, 475)
#################################################
# 1 - Run spherical k-means...
#################################################
 == New ini




INFO: Cluster_Ensembles: cluster_ensembles: CSPA at 0.7378227555176213.
*****

*****
INFO: Cluster_Ensembles: HGPA: consensus clustering using HGPA.

#
INFO: Cluster_Ensembles: wgraph: writing wgraph_HGPA.
INFO: Cluster_Ensembles: wgraph: 475 vertices and 48 non-zero hyper-edges.
#

#
INFO: Cluster_Ensembles: sgraph: calling shmetis for hypergraph partitioning.
INFO: Cluster_Ensembles: sgraph: (hyper)-graph partitioning completed; loading wgraph_HGPA.part.4
#





INFO: Cluster_Ensembles: cluster_ensembles: HGPA at 0.0037683543605464675.
*****

*****
INFO: Cluster_Ensembles: MCLA: consensus clustering using MCLA.
INFO: Cluster_Ensembles: MCLA: preparing graph for meta-clustering.
INFO: Cluster_Ensembles: MCLA: done filling hypergraph adjacency matrix. Starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: starting computation of Jaccard similarity matrix.
INFO: Cluster_Ensembles: MCLA: done computing the matrix of pairwise Jaccard similarity scores.

#
INFO: Cluster_Ensembles: wgraph: writing wgraph_MCLA.
#

#
INFO: Cluster_Ensembles: sgraph: calling gpmetis for graph partitioning.
INFO: Cluster_Ensembles: sgraph: (hyper)-graph partitioning completed; loading wgraph_MCLA.part.4
#
INFO: Cluster_Ensembles: MCLA: delivering 4 clusters.
INFO: Cluster_Ensembles: MCLA: average posterior probability is 0.5133271747090881

INFO: Cluster_Ensembles: cluster_ensembles: MCLA at 0.8518280459046195.
*****
##########################



#################################################
# Results
#################################################
[0.7297478031873091, 0.7639518703385052, 0.7842936566257, 0.7620926477024674]
[0.7647036826065475, 0.8063447079261147, 0.8190460015788382, 0.802673616737156]
#################################################
# DATASET: ng5
#################################################
(4905, 10167)
(1, 4905)
5
#################################################
# 1 - Run CoclustInfo...
#################################################
0
6
12
18
24
30
36
42
48
54
#################################################
# 1 - Run CoclustSpecMod...
#################################################
0
1
2
#################################################
# 1 - Run CoclustMod...
#################################################
0
6
12


In [0]:
from google.colab import drive
drive.mount('/content/drive')

#import sys
#sys.path.append('/content/drive/My Drive/M2/Mixture/Cluster_Ensembles_Custom')
#import Cluster_Ensembles_Custom as CE

from coclust.coclustering import CoclustInfo, CoclustMod, CoclustSpecMod
from coclust.clustering import SphericalKmeans
import Cluster_Ensembles as CE
from scipy import io
import sys
import numpy as np
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

dataset = "cstr"

#algo = CoclustMod
#algo = CoclustSpecMod
algo = CoclustInfo


def make_co_assoc(row_labels):
    co_assoc = np.zeros((row_labels.shape[1], row_labels.shape[1]))
    for i in range(row_labels.shape[0]):
        labels = row_labels[i,]
        #temp = np.array([[int(i == j) for i in labels] for j in labels])
        n_values = np.max(labels) + 1
        temp = np.eye(n_values)[labels]
        temp = np.dot(temp, temp.T)
        co_assoc += temp
    return co_assoc


nmis = []
aris = []


n_col_clusters = 10

nmi = normalized_mutual_info_score
ari = adjusted_rand_score

##############
# Load Data
##############

mat_file = "/content/drive/My Drive/M2/Mixture/Dataset2/"+dataset+"_ti_n.mat"
mat2_file = "/content/drive/My Drive/M2/Mixture/Dataset3/"+dataset+".mat"

# Load dataset
mat = io.loadmat(mat_file)

#X = mat['X']
#y = mat['y']
X = mat['dtm']

print(X.shape)

# Load label
mat2 = io.loadmat(mat2_file)
if 'gnd' in mat2:
    y = mat2['gnd']
elif 'labels' in mat2:
    y = mat2['labels']

number_of_classes = len(np.unique(y))

print(y.shape)
print(number_of_classes)



##############
# 1
##############
print("#################################################")
print("# 1 - Run coclust...")
print("#################################################")

criterions = []
t_row_labels = []

for i in range(200):
    if i % 10 == 0:
        print(i)
    model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, max_iter=100)
    model.fit(X)
    #print(model.criterion)
    criterions.append(model.criterion)
    t_row_labels.append(model.row_labels_)

row_labels = np.array(t_row_labels)
order = list(np.flip(np.argsort(criterions))[:10])

row_labels = row_labels[order]
print(row_labels.shape)



##############
# 2
##############
print("#################################################")
print("# 2 - Run cluster ensembles...")
print("#################################################")

def run_cluster_ensembles(row_labels, number_of_classes, y):
    res = CE.cluster_ensembles(cluster_runs=row_labels, N_clusters_max=number_of_classes)
    
    nmis.append(nmi(res, y.ravel()))
    aris.append(ari(res, y.ravel()))
    
    return res

run_cluster_ensembles(row_labels, number_of_classes, y)



##############
# 3
##############
print("#################################################")
print("# 3 - Run coclust on co-association matrix...")
print("#################################################")

def run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y):
    co_assoc = make_co_assoc(row_labels)
        
    model = algo(n_row_clusters=number_of_classes, n_col_clusters=n_col_clusters, n_init=20, max_iter=100)
    model.fit(co_assoc)

    nmis.append(nmi(model.row_labels_, y.ravel()))
    aris.append(ari(model.row_labels_, y.ravel()))

    return model.row_labels_

run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y)



##############
# 4
##############
print("#################################################")
print("# 4 - Run spherical k-means...")
print("#################################################")
model = SphericalKmeans(n_clusters=number_of_classes, n_init=20, max_iter=100)
model.fit(X)

#row_labels = np.concatenate([row_labels, np.matrix(model.row_labels_)], axis=0)
temp = list(row_labels)
temp.append(model.row_labels_)
row_labels = np.array(temp)

run_cluster_ensembles(row_labels, number_of_classes, y)
run_coclust_co_assoc(row_labels, number_of_classes, n_col_clusters, y)



##############
# 
##############
print("#################################################")
print("# Results")
print("#################################################")

print(nmis)
    
print(aris)