In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import arff
from scipy.io import arff
from sklearn import cluster
from sklearn import metrics

In [3]:
def get_data(path,name):
    databrut = arff.loadarff(open(path+str(name), 'r'))
    datanp = np.array([[x[0],x[1]] for x in databrut[0]])
    return datanp

### Score de regroupement

In [None]:
def min_distance_to_center(model, datant):
    labels = model.labels_
    centers = model.cluster_centers_

    min_distances = []

    for i in range(model.n_clusters):
        indices = np.where(labels == i)[0]
        distances = metrics.pairwise_distances(datant[indices], [centers[i]], metric='euclidean')
        min_distance = np.min(distances)
        min_distances.append(min_distance)

    return np.mean(min_distances)

In [None]:
def max_distance_to_center(model, datant):
    labels = model.labels_
    centers = model.cluster_centers_

    max_distances = []

    for i in range(model.n_clusters):
        indices = np.where(labels == i)[0]
        distances = metrics.pairwise_distances(datant[indices], [centers[i]], metric='euclidean')
        max_distance = np.max(distances)
        max_distances.append(max_distance)

    return np.mean(max_distances)

In [None]:
def mean_distance_to_center(model, datant):
    labels = model.labels_
    centers = model.cluster_centers_

    mean_distances = []

    for i in range(model.n_clusters):
        indices = np.where(labels == i)[0]
        distances = metrics.pairwise_distances(datant[indices], [centers[i]], metric='euclidean')
        mean_distance = np.mean(distances)
        mean_distances.append(mean_distance)

    return np.mean(mean_distances)

In [None]:
def calculer_score_regroupement(model,path,name):
    min = min_distance_to_center(model,get_data(path,name))
    max = max_distance_to_center(model,get_data(path,name))
    mean = mean_distance_to_center(model,get_data(path,name))
    
    # Afficher les résultats
    print(f"Distance moyenne entre les points d'un cluster et son centre pour chaque cluster: {mean}")
    print(f"Distance minimale pour chaque cluster: {min}")
    print(f"Distance maximale pour chaque cluster: {max}")

### Score de séparation

In [None]:
def single_linkage(model,datanp):
    labels = model.labels_
    n_clusters = model.n_clusters
    
    single_linkage_distances = []
    
    for i in range(n_clusters):
        for j in range(i + 1, n_clusters):
            # Indices des points appartenant aux clusters i et j respectivement
            indices_i = np.where(labels == i)[0]
            indices_j = np.where(labels == j)[0]
    
            distances = metrics.pairwise_distances(datanp[indices_i], datanp[indices_j], metric='euclidean')
            
            min_distance_ij = np.min(distances)
    
            single_linkage_distances.append(min_distance_ij)
            
    return np.mean(single_linkage_distances)

In [None]:
def complete_linkage(model, datanp):
    # Obtenir les labels des clusters assignés à chaque point
    labels = model.labels_

    # Nombre de clusters
    n_clusters = model.n_clusters
    
    # Initialiser la distance minimale entre les exemples les plus proches
    complete_linkage_distances = []
    
    # Calculer la distance entre les exemples les plus proches appartenant à deux clusters différents
    for i in range(n_clusters):
        for j in range(i + 1, n_clusters):
            indices_i = np.where(labels == i)[0]
            indices_j = np.where(labels == j)[0]
    
            distances = metrics.pairwise_distances(datanp[indices_i], datanp[indices_j], metric='euclidean')
    
            # Extraire la distance minimale
            max_distance_ij = np.max(distances)
    
            # Stocker la distance maximale dans le dictionnaire
            complete_linkage_distances.append(max_distance_ij)

    return np.mean(complete_linkage_distances)

In [None]:
def centroid_linkage(model):
    clusters = model.cluster_centers_
    distances = metrics.pairwise.euclidean_distances(clusters, clusters)
    centers_distances_indices = np.triu_indices(n=distances.shape[0], k=1)
    return np.mean(distances[centers_distances_indices])

In [None]:
def calculer_score_separation(model,path,name):
    simple = single_linkage(model,get_data(path,name))
    complete = complete_linkage(model,get_data(path,name))
    centroid = centroid_linkage(model)
    
    # Afficher les résultats
    print(f"Distance moyenne entre les centres de cluster : {centroid}")
    print(f"Distance moyenne minimale : {simple}")
    print(f"Distance moyenne maximale : {complete}")
