In [2]:
from matplotlib import pyplot as plt
from utils.utils import *
import utils.promethee_functions as pf
import utils.clustering_functions as cf

data = read_data()
group0 = ["PAK", "SDN", "BDI", "HTI"]
group1 = ["EST", "CZE", "MLT", "SGP", "IRL"]
group2 = ["CHE", "ISL", "NZL", "SWE"]

all_groups = group0 + group1 + group2

data = data.loc[all_groups]

# Nb of criteria
K = data.columns.shape[0] # Nb of criteria
L = data.iloc[0]["co2prod"].shape[0] # Length of the time series

W = [1/K for i in range(K)] # Weights of the criteria (equal weights)
Q = [10, 0.3, 40,  0.4, 4, 2] # Indifference thresholds
P = [28, 0.9, 80, 1, 18, 12] # Preference thresholds

utils.py Loading
Reading HDI dataset
co2prod: min=0.0, max=33.3863
hdi: min=0.257, max=0.967
le: min=37.105, max=85.473
gdi: min=0.383, max=1.041
eys: min=3.5751, max=23.2477
mys: min=1.4606, max=14.2559


In [149]:
def K_Medoid_Eta(alternatives, distance_matrix, k=3, prototype_method="random", print_results=True, iter_max=100):
    """ 
    K-Medoid clustering algorithm using the Aggregated Eta matrix
        - alternatives: np.array of the alternatives names only
        - distance_matrix: pd.DataFrame of the distance matrix with index and columns as the alternatives names
        - k: the number of clusters

    Returns:
        - the medoids of the clusters
        - the clusters
    """

    # Initialize medoids
    if prototype_method == "random":
        medoids = np.random.choice(alternatives, k, replace=False)
    
    elif prototype_method == "farthest":
        # Select the farthest alternatives from each other
        medoids = [alternatives[0]]
        for _ in range(k-1):
            distances = [np.min([distance_matrix.loc[alternative, medoid] for medoid in medoids]) for alternative in alternatives]
            new_medoid = alternatives[np.argmax(distances)]
            medoids.append(new_medoid)
        medoids = np.array(medoids)

    if print_results:
        print("Initial medoids:", medoids)

    # Initialize clusters
    clusters = {medoid: [] for medoid in medoids}

    # Initialize assignment check
    assigned = {alternative: False for alternative in alternatives}


    # When entering the loop, we just have the medoids and no assigned alternatives
    iter = 0
    converged = False
    while not converged and iter < iter_max:

        # Assign medoid to its cluster
        for medoid in medoids:
            clusters[medoid].append(medoid)
            assigned[medoid] = True


        # Assign each alternative to the closest medoid
        for alternative in alternatives:
            if not assigned[alternative]: # If not yet assigned, assign it to the closest medoid
                distances = [distance_matrix.loc[alternative, medoid] for medoid in medoids] # Take the distances to each medoid
                closest_medoid = medoids[np.argmin(distances)] # Take the medoid with the smallest distance
                clusters[closest_medoid].append(alternative) # Assign the alternative to the cluster of the closest medoid
                assigned[alternative] = True

        if print_results:
            print("Iteration", iter)
            print("Clusters:", clusters)
            print("Assigned:", all(assigned.values()))

        # Update medoids
        converged = True
        for medoid in medoids:
            cluster = clusters[medoid]

            # Compute the sum of the distance for each alternative in the cluster towards the other alternatives in the cluster
            distances = [np.sum([distance_matrix.loc[alternative, alternative2] for alternative2 in cluster]) for alternative in cluster]
            if len(distances) > 1:
                new_medoid = cluster[np.argmin(distances)] # Take the alternative with the smallest sum of distances
            else:
                new_medoid = medoid
            if new_medoid != medoid:
                # print("Medoid", medoid, "changed to", new_medoid, "in array", medoids)
                index = np.where(medoids == medoid)
                # print("Index:", index[0])
                medoids[index[0][0]] = new_medoid # Update the medoid in the list
                converged = False # If at least one medoid has changed, we have not converged
                clusters = {medoid: [] for medoid in medoids} # Reinitialize the cluster
                assigned = {alternative: False for alternative in alternatives} # Reinitialize the cluster assignment check
        iter += 1
    
    if iter_max == iter:
        if print_results:
            print("Max iterations reached, no convergence but assigning the alternatives to the closest last medoid computed:")
        for alternative in alternatives:
            if not assigned[alternative]:
                distances = [distance_matrix.loc[alternative, medoid] for medoid in medoids]
                closest_medoid = medoids[np.argmin(distances)]
                clusters[closest_medoid].append(alternative)

    return medoids, clusters, iter

In [143]:
array = np.array(['PAK', 'NZL', 'CZE'])
index = np.where(array == "NZL")[0][0]
array[index] = "CHE"
print(array)


['PAK' 'CHE' 'CZE']


In [146]:
t = 0


alternatives = data.index
phi_c_all = pf.get_all_Phi_c(data, P, Q)
temporal_eta = pf.get_eta_matrix(data, phi_c_all, W)

eta_matrix = temporal_eta[:,:,t]
eta_matrix = pd.DataFrame(eta_matrix, index=alternatives, columns=alternatives)

medoids, clusters, iter = K_Medoid_Eta(alternatives, eta_matrix, k=3, prototype_method="farthest", print_results=True)

print("\nMedoids:", medoids)
for medoid in medoids:
    print("Cluster", medoid, ":", clusters[medoid])

  d = a_i[c] - a_j[c]


Initial medoids: ['PAK' 'NZL' 'CZE']
Iteration 0
Clusters: {np.str_('PAK'): [np.str_('PAK'), 'SDN', 'BDI', 'EST'], np.str_('NZL'): [np.str_('NZL'), 'CHE', 'ISL', 'SWE'], np.str_('CZE'): [np.str_('CZE'), 'HTI', 'MLT', 'SGP', 'IRL']}
Assigned: True
Medoid NZL changed to CHE in array ['PAK' 'NZL' 'CZE']
Index: [1]
Iteration 1
Clusters: {np.str_('PAK'): [np.str_('PAK'), 'SDN', 'BDI', 'EST'], np.str_('CHE'): [np.str_('CHE'), 'ISL', 'NZL', 'SWE'], np.str_('CZE'): [np.str_('CZE'), 'HTI', 'MLT', 'SGP', 'IRL']}
Assigned: True

Medoids: ['PAK' 'CHE' 'CZE']
Cluster PAK : [np.str_('PAK'), 'SDN', 'BDI', 'EST']
Cluster CHE : [np.str_('CHE'), 'ISL', 'NZL', 'SWE']
Cluster CZE : [np.str_('CZE'), 'HTI', 'MLT', 'SGP', 'IRL']


In [150]:
def get_clusters_evolution(data, P, Q, W, K, L, k=3):
    """ 
    Get the evolution of the clusters over time using the K-Medoids with Eta distances
    - data: Dataframe with the alternatives
    - P: Preference thresholds
    - Q: Indifference thresholds
    - W: Weights of the criteria
    - K: Number of criteria
    - L: Length of the time series
    - k: Number of clusters to form (for the K-Medoids algorithm)
    """

    alternatives = data.index
    # Get the temporal_eta matrix
    phi_c_all = pf.get_all_Phi_c(data, P, Q)
    temporal_eta = pf.get_eta_matrix(data, phi_c_all, W)

    # Get the clusters evolution
    clusters_evolution = []
    medoids_evolution = []
    iterations = []

    for t in tqdm(range(L)):
        eta_matrix = temporal_eta[:,:,t]
        eta_matrix = pd.DataFrame(eta_matrix, index=alternatives, columns=alternatives)
        medoids, clusters, _ = K_Medoid_Eta(alternatives, eta_matrix, k, prototype_method="farthest", print_results=False)
        clusters_evolution.append(clusters)
        medoids_evolution.append(medoids)
        iterations.append(t)

    return clusters_evolution, medoids_evolution, iterations

clusters_evolution, medoids_evolution, iterations = get_clusters_evolution(data, P, Q, W, K, L, k=3)

  d = a_i[c] - a_j[c]
100%|██████████| 33/33 [00:00<00:00, 175.98it/s]


In [151]:
9*1.25

11.25