In [1]:
import os
import numpy as np
from scipy.optimize import linear_sum_assignment

path = "."

In [2]:
def load_all_cluster_series_from_folder(folder_path):
    """
    Loads all the .npy files in the path and returns a list of the time-series.
    """
    time_series_list = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".npy"):
            full_path = os.path.join(folder_path, file_name)
            arr = np.load(full_path)
            time_series_list.append((file_name, arr))
    
    return time_series_list

# Hungarian

In [3]:
def find_best_permutation(reference, target, n_clusters):
    """
    Findet die beste Permutation der Clusterlabels von target,
    damit sie möglichst gut zu reference passen.
    """
    # Kostenmatrix: Zeigt, wie oft Cluster i in reference mit j in target übereinstimmt
    cost_matrix = np.zeros((n_clusters, n_clusters))
    
    for i in range(n_clusters):
        for j in range(n_clusters):
            cost_matrix[i, j] = -np.sum((reference == i+1) & (target == j+1))

    # Ungarischer Algorithmus
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    
    # Baue die Permutationstabelle
    permutation = dict(zip(col_ind + 1, row_ind + 1))
    
    # Wende Permutation an
    target_aligned = np.array([permutation[label] for label in target])
    
    return target_aligned, permutation


In [4]:
time_series_list = load_all_cluster_series_from_folder(path)

for name, series in time_series_list:
    if name == "kmeans.npy":
        ref_name, ref_series = name, series
        break

In [5]:
aligned_series = []

for name, series in time_series_list:
    if name == ref_name:
        aligned_series.append((name, series))
    else:
        aligned, perm = find_best_permutation(ref_series, series, n_clusters=9)
        aligned_series.append((name, aligned))
        print(f"{name} permuted with: {perm}")

ae.npy permuted with: {5: 1, 2: 2, 4: 3, 7: 4, 3: 5, 9: 6, 8: 7, 1: 8, 6: 9}
vae.npy permuted with: {3: 1, 9: 2, 5: 3, 2: 4, 6: 5, 4: 6, 8: 7, 7: 8, 1: 9}
convae.npy permuted with: {5: 1, 4: 2, 8: 3, 9: 4, 2: 5, 3: 6, 7: 7, 1: 8, 6: 9}


# Greedy

In [6]:
def compute_contingency_matrix(ref, target, K):
    matrix = np.zeros((K, K), dtype=int)
    for i in range(K):
        for j in range(K):
            matrix[i, j] = np.sum((ref == (i+1)) & (target == (j+1)))
    return matrix

def greedy_match(contingency):
    K = contingency.shape[0]
    ref_used = set()
    target_used = set()
    mapping = {}

    while len(mapping) < K:
        max_val = -1
        max_pair = None

        for i in range(K):
            if i in ref_used:
                continue
            for j in range(K):
                if j in target_used:
                    continue
                if contingency[i, j] > max_val:
                    max_val = contingency[i, j]
                    max_pair = (i, j)

        i, j = max_pair
        mapping[j+1] = i+1  # +1 wenn Labels bei 1 starten
        ref_used.add(i)
        target_used.add(j)

    return mapping

def apply_mapping(target, mapping):
    return np.array([mapping[label] for label in target])

In [7]:
time_series_list = load_all_cluster_series_from_folder(path)

K = max(np.max(series) for _, series in time_series_list)

aligned_series = [(ref_name, ref_series)]

for name, series in time_series_list:
    if name == "kmeans.npy":
        continue
    contingency = compute_contingency_matrix(ref_series, series, K)
    mapping = greedy_match(contingency)
    aligned = apply_mapping(series, mapping)
    aligned_series.append((name, aligned))
    print(f"{name} mapping:", mapping)

ae.npy mapping: {7: 4, 5: 1, 9: 6, 2: 2, 8: 7, 1: 8, 4: 9, 3: 3, 6: 5}
vae.npy mapping: {2: 4, 4: 6, 3: 1, 7: 8, 6: 3, 8: 7, 9: 2, 1: 5, 5: 9}
convae.npy mapping: {3: 6, 2: 4, 5: 1, 1: 8, 7: 7, 8: 2, 6: 9, 9: 3, 4: 5}
