In [None]:
import scipy.io
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

class KMeansClusterer:
    def __init__(self, n_clusters):
        self.n_clusters = n_clusters
        self.kmeans = None
        self.cluster_labels = None
        self.centroids = None
        self.inertia = None
        self.matrices = None
        self.dyad_clusters = None

    def load_all_data(self, directory):
        all_data = []
        self.dyad_names = []  # Track dyad names
        for file in sorted(os.listdir(directory)):
            if file.endswith(".mat"):
                mat = scipy.io.loadmat(os.path.join(directory, file))
                if 'WTC3DmatrixDyad' in mat:
                    all_data.append(mat['WTC3DmatrixDyad'])
                    self.dyad_names.append(file)
        return np.array(all_data), self.dyad_names

    def extract_diagonals(self, data):
        diagonals = []
        for i in range(data.shape[0]):  # Iterate over dyads
            for j in range(data.shape[3]):  # Iterate over time steps
                diag_values = np.diagonal(data[i, :, :, j])  # Extract diagonal
                diagonals.append(diag_values)
        return np.array(diagonals)

    def flatten_matrices(self, matrices):
        return matrices.reshape(matrices.shape[0], -1)

    def fit(self, matrices):
        self.matrices = self.flatten_matrices(matrices)
        self.matrices = self.matrices / self.matrices.max()
        self.kmeans = KMeans(n_clusters=self.n_clusters, random_state=0)
        self.cluster_labels = self.kmeans.fit_predict(self.matrices)
        self.centroids = self.kmeans.cluster_centers_
        
    #application de la methode du coude pour le matrices
    def elbow_method(self, matrices, range_clusters, save_path):
        flattened_data = self.flatten_matrices(matrices)
        flattened_data_normalized = flattened_data / flattened_data.max()
        inertia_values = []
        for k in range(1, range_clusters):
            kmeans = KMeans(n_clusters=k, random_state=0).fit(flattened_data_normalized)
            inertia_values.append(kmeans.inertia_)
        plt.figure()
        plt.plot(range(1, range_clusters), inertia_values, marker='o', linestyle='--')
        plt.xlabel('Clusters')
        plt.ylabel('Inertia')
        plt.title('Elbow Method')
        plt.savefig(os.path.join(save_path, "elbow_method.png"))
        plt.close()

    # application de la methode de la silhouette
    def silhouette_score_method(self, matrices, range_clusters, save_path):
        flattened_data = self.flatten_matrices(matrices)
        flattened_data_normalized = flattened_data / flattened_data.max()
        silhouette_scores = []
        for k in range(2, range_clusters):
            kmeans = KMeans(n_clusters=k, random_state=0).fit(flattened_data_normalized)
            labels = kmeans.labels_
            silhouette_scores.append(silhouette_score(flattened_data_normalized, labels))
        plt.figure()
        plt.plot(range(2, range_clusters), silhouette_scores, marker='o', linestyle='--')
        plt.xlabel('Clusters')
        plt.ylabel('Silhouette Score')
        plt.title('Silhouette Score')
        plt.savefig(os.path.join(save_path, "silhouette_method.png"))
        plt.close()

        #apres definition des clusters, assignation de chaque matrice de chaque dyade a un des clusters
    def assign_clusters_to_dyads(self):
        if self.cluster_labels is None:
            raise ValueError("Train the model before assigning clusters.")
        if not hasattr(self, 'dyad_names') or not self.dyad_names:
            raise ValueError("Load dyad data before assigning clusters.")

        num_dyads = len(self.dyad_names)
        total_labels = len(self.cluster_labels)
        num_time_steps = total_labels // num_dyads  # Time steps per dyad

        dyad_clusters = {}
        for k, label in enumerate(self.cluster_labels):
            dyad_index = k // num_time_steps
            dyad_name = self.dyad_names[dyad_index]
            if dyad_name not in dyad_clusters:
                dyad_clusters[dyad_name] = []
            dyad_clusters[dyad_name].append(label)

        self.dyad_clusters = dyad_clusters
        return dyad_clusters

        # Cette fonction permet d'afficher les centroides de chaque cluster
    def plot_centroids(self, matrices, save_path):
        if self.kmeans is None:
            raise ValueError("Train the model before plotting centroids.")
        self.centroids = self.centroids.reshape(self.centroids.shape[0], matrices.shape[1])
        plt.figure(figsize=(10, 5))
        for i in range(self.centroids.shape[0]):
            plt.subplot(2, self.centroids.shape[0], i + 1)
            plt.plot(self.centroids[i])
            plt.title(f'Centroid {i+1}')
        plt.tight_layout()
        plt.savefig(os.path.join(save_path, "centroids.png"))
        plt.close()

    # distribution de toutes les matrices en clusters pour un certain "temps"
    def save_cluster_distribution_csv(self, save_path):
        if self.dyad_clusters is None:
            raise ValueError("Assign clusters before saving distribution.")
        first_dyad = next(iter(self.dyad_clusters.values()))
        time_steps = len(first_dyad)
        data = []
        for t in range(time_steps):
            cluster_counts = {c: 0 for c in range(self.n_clusters)}
            for dyad, clusters in self.dyad_clusters.items():
                cluster_counts[clusters[t]] += 1
            for cluster, count in cluster_counts.items():
                data.append([t, cluster, count])
        df = pd.DataFrame(data, columns=['Time Step', 'Cluster', 'Dyad Count'])
        df.to_csv(os.path.join(save_path, "cluster_distribution.csv"), index=False)

        
    # affichage de la distribution des matrices de chaque dyade en clusters au cours du temps
    def plot_dyad_clusters(self, save_path):
        if self.dyad_clusters is None:
            raise ValueError("Assign clusters before plotting dyad clusters.")
        dyad_plots_dir = os.path.join(save_path, "dyad_plots")
        os.makedirs(dyad_plots_dir, exist_ok=True)
        for dyad, values in self.dyad_clusters.items():
            time = np.arange(len(values))
            plt.figure(figsize=(8, 4))
            plt.step(time, values, where='post', marker="o", linestyle="-")
            plt.xlabel("Time (s)")
            plt.ylabel("Cluster")
            plt.title(f"Dyad {dyad} Cluster Evolution")
            plt.grid(True, linestyle="--", alpha=0.7)
            plt.savefig(os.path.join(dyad_plots_dir, f"dyad_{dyad}.png"))
            plt.close()

    #affichage du silhouette score
    def print_silhouette_score(self, save_path):
        if self.kmeans is None or self.cluster_labels is None:
            raise ValueError("Train the model before computing silhouette score.")
        flattened_data = self.flatten_matrices(self.matrices)  
        flattened_data_normalized = flattened_data / flattened_data.max()
        score = silhouette_score(flattened_data_normalized, self.cluster_labels)
        with open(os.path.join(save_path, "silhouette_score.txt"), "w") as f:
            f.write(f"Silhouette Score: {score:.4f}\n")                 

# Main execution loop
    # Pour changer le nombre de clusters etudiés il suffit de changer la brone sup de la boucle for
for n_clusters in range(2, 11):
    save_directory = f"results_diagonal_{n_clusters}"#modifier le nom des fichiers pour sauvegarder les résultats
    os.makedirs(save_directory, exist_ok=True)
    clusterer = KMeansClusterer(n_clusters=n_clusters)
    
    data, dyad_names = clusterer.load_all_data("Win15sec-Overlap1sec")# modifier le fichier avec les données brutes
    if len(data) == 0 or len(clusterer.dyad_names) == 0:
        raise ValueError("No valid dyad data found in directory.")
    
    diagonals = clusterer.extract_diagonals(data)
    clusterer.fit(diagonals)
    clusterer.elbow_method(diagonals, range_clusters=10, save_path=save_directory)
    clusterer.silhouette_score_method(diagonals, range_clusters=10, save_path=save_directory)
    clusterer.plot_centroids(diagonals, save_path=save_directory)
    clusterer.assign_clusters_to_dyads()
    clusterer.save_cluster_distribution_csv(save_path=save_directory)
    clusterer.assign_clusters_to_dyads()
    clusterer.plot_dyad_clusters(save_path=save_directory)
    clusterer.print_silhouette_score(save_path=save_directory)
    print(f"Results saved in {save_directory}")
