In [6]:
import numpy as np
import pandas as pd

def generate_synthetic_data(num_elements):
    # Generate synthetic dissimilarity matrix
    dissimilarity_matrix = np.random.randint(1, 10, size=(num_elements, num_elements))
    np.fill_diagonal(dissimilarity_matrix, 0)  # Set diagonal elements to 0
    element_labels = [chr(97 + i) for i in range(num_elements)]  # Generate element labels 'a', 'b', 'c', ...

    return dissimilarity_matrix, element_labels

def hierarchical_clustering(dissimilarity_matrix, element_labels):
    def avg_dissim_within_group_element(ele, element_list):
        max_diameter = -np.inf
        sum_dissm = 0
        for i in element_list:
            sum_dissm += dissimilarity_matrix.loc[ele, i]
            if dissimilarity_matrix.loc[ele, i] > max_diameter:
                max_diameter = dissimilarity_matrix.loc[ele, i]
        if len(element_list) > 1:
            avg = sum_dissm / (len(element_list) - 1)
        else:
            avg = 0
        return avg

    def avg_dissim_across_group_element(ele, main_list, splinter_list):
        if len(splinter_list) == 0:
            return 0
        sum_dissm = 0
        for j in splinter_list:
            sum_dissm = sum_dissm + dissimilarity_matrix.loc[ele, j]
        avg = sum_dissm / len(splinter_list)
        return avg

    def splinter(main_list, splinter_group):
        most_dissm_object_value = -np.inf
        most_dissm_object_index = None
        for ele in main_list:
            x = avg_dissim_within_group_element(ele, main_list)
            y = avg_dissim_across_group_element(ele, main_list, splinter_group)
            diff = x - y
            if diff > most_dissm_object_value:
                most_dissm_object_value = diff
                most_dissm_object_index = ele
        if most_dissm_object_value > 0:
            return most_dissm_object_index, 1
        else:
            return -1, -1

    def split(element_list):
        main_list = element_list
        splinter_group = []
        most_dissm_object_index, flag = splinter(main_list, splinter_group)
        while flag > 0:
            main_list.remove(most_dissm_object_index)
            splinter_group.append(most_dissm_object_index)
            most_dissm_object_index, flag = splinter(element_list, splinter_group)
        return main_list, splinter_group

    def max_diameter(cluster_list):
        max_diameter_cluster_index = None
        max_diameter_cluster_value = -np.inf
        index = 0
        for element_list in cluster_list:
            for i in element_list:
                for j in element_list:
                    if dissimilarity_matrix.loc[i, j] > max_diameter_cluster_value:
                        max_diameter_cluster_value = dissimilarity_matrix.loc[i, j]
                        max_diameter_cluster_index = index
            index += 1
        if max_diameter_cluster_value <= 0:
            return -1
        return max_diameter_cluster_index

    current_clusters = [element_labels]
    level = 1
    index = 0
    while index != -1:
        print(f"Level {level}: {current_clusters}")
        a_clstr, b_clstr = split(current_clusters[index])
        del current_clusters[index]
        current_clusters.append(a_clstr)
        current_clusters.append(b_clstr)
        index = max_diameter(current_clusters)
        level += 1

    print(f"Level {level}: {current_clusters}")

# Example usage with synthetic data
num_elements = 9  # Adjust the number of elements as needed
synthetic_dissimilarity_matrix, synthetic_element_labels = generate_synthetic_data(num_elements)

dissimilarity_df = pd.DataFrame(synthetic_dissimilarity_matrix, index=synthetic_element_labels, columns=synthetic_element_labels)

hierarchical_clustering(dissimilarity_df, synthetic_element_labels)


Level 1: [['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']]
Level 2: [['a', 'e'], ['g', 'd', 'f', 'i', 'h', 'b', 'c']]
Level 3: [['a', 'e'], ['c'], ['g', 'f', 'i', 'd', 'h', 'b']]
Level 4: [['a', 'e'], ['c'], ['h', 'b'], ['g', 'd', 'i', 'f']]
Level 5: [['a', 'e'], ['c'], ['h', 'b'], ['d', 'i'], ['g', 'f']]
Level 6: [['a', 'e'], ['c'], ['h', 'b'], ['d', 'i'], ['f'], ['g']]
Level 7: [['c'], ['h', 'b'], ['d', 'i'], ['f'], ['g'], ['a'], ['e']]
Level 8: [['c'], ['h', 'b'], ['f'], ['g'], ['a'], ['e'], ['i'], ['d']]
Level 9: [['c'], ['f'], ['g'], ['a'], ['e'], ['i'], ['d'], ['b'], ['h']]
