In [37]:
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
import numpy as np

def alignement_clustering(labels1, labels2):
    cm = confusion_matrix(labels1, labels1)
    # on trouve l'allocation idéale en resolvant le problème de maximisation transformé en problème de minimisation
    rowCorrect, ColCorrect = linear_sum_assignment(-np.array(cm))
    cm = cm[:,ColCorrect]
    ACC = np.trace(cm) / np.sum(cm)
    
    mapfunc = {}
    for i in range(len(ColCorrect)):
        mapfunc[rowCorrect[i]] = ColCorrect[i]

    return {"ACC" : ACC, "map" : mapfunc}

In [49]:
class SetLearningWeakClusterning():

    def __init__(self) -> None:
        self.labels_ = None

    def fit(self, X, k):
        pass


In [82]:
class SetLearningClustering():

    def __init__(self, models) -> None:
        self.models = models
        self.labels_ = []
        self.mapfuncs = []

    def fit(self, X, k):
        self.labels_ = []
        self.mapfuncs = []

        nmodels = len(self.models)
        # > calcul des labels pour chacun des modèles
        for i in range(nmodels):
            self.models[i].fit(X, k)

        # > alignement des labels de clusters
        # calcul des function de correspondance entre les identifiants des weaklearners
        # et ceux de modèles principale

        # on va considéré que le premier model à les identifiants du model

        for i in range(nmodels):
            if i == 0:
                self.mapfuncs.append({_: _ for _ in range(k)})
            else:
                self.mapfuncs.append(alignement_clustering(
                    self.models[i].labels_, self.models[0].labels_)["map"])

        # > calcul des labels finaux
        N = self.models[0].labels_.shape[0]
        for i in range(N):

            nb = [0 for _ in range(k)]
            for j in range(nmodels):
                id = self.mapfuncs[j][self.models[j].labels_[i]]
                nb[id] = nb[id] + 1
            # calcul de l'indice maximum
            # choix aléatoire de l'identifiant maximum
            id = np.argmax(nb)
            try:
                id = id[np.random.randint(id.shape[0])]
            except IndexError:
                pass
                
            self.labels_.append(id)


In [83]:
from sklearn.datasets import load_iris

In [105]:
X = load_iris()['data']
y = load_iris()["target"]

In [106]:
from sklearn.cluster import KMeans
k = 3
kmeans = KMeans(n_clusters=k, n_init="auto", max_iter=1)
kmeans.fit(X)

In [107]:
alignement_clustering(y, kmeans.labels_)

{'ACC': 1.0, 'map': {0: 0, 1: 1, 2: 2}}

In [99]:
class KmeansSet(SetLearningWeakClusterning):    
    def fit(self, X, k):
        kmeans = KMeans(n_clusters=k, n_init="auto", max_iter=1)
        kmeans.fit(X)
        self.labels_ = kmeans.labels_

In [100]:

setModel = SetLearningClustering([KmeansSet() for _ in range(10)])

In [101]:
setModel.fit(X, k)

In [102]:
alignement_clustering(y, setModel.labels_)

{'ACC': 1.0, 'map': {0: 0, 1: 1, 2: 2}}