In [None]:
def print_tree(Tree : ClusterNode):
    if Tree is None: 
        return
    print(Tree.__dict__)
    print_tree(Tree.left)
    print_tree(Tree.right) 

In [None]:
def insert_id(Tree : ClusterNode, id_members : list): 
    if Tree.left is None and Tree.right is None:
        id_members.append(Tree.id) 
        return None
    insert_id(Tree.left, id_members)
    insert_id(Tree.right, id_members)

def find_id_members(Node : ClusterNode) -> np.ndarray: 
    id_members = []
    insert_id(Node, id_members)
    return id_members

def insert_node(Tree : ClusterNode, list_of_nodes : list[ClusterNode],  to_depth : int, current_depth : int): 
    break_condition = (current_depth == to_depth) or (Tree.left is None and Tree.right is None)
    if break_condition: 
        list_of_nodes.append(Tree)
        return None
    insert_node(Tree.left, list_of_nodes, to_depth, current_depth + 1)
    insert_node(Tree.right, list_of_nodes, to_depth, current_depth + 1)
    
def find_nodes(Tree : ClusterNode, to_depth : int) -> list[ClusterNode]: 
    nodes = []
    insert_node(Tree, nodes, to_depth, 0)
    return nodes

In [None]:
def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    linkage_matrix = build_linkage_matrix(model)

    # Plot the corresponding dendrogram
    return dendrogram(linkage_matrix, **kwargs)

In [None]:
def labels_from_tree(points : np.ndarray, tree : ClusterNode, n_clusters : int = 2): 
    #maximum tree depth that has to be reached (root is at level zero)
    depth = int(np.log2(n_clusters))  

    list_of_cluster_members = []
    for node in find_nodes(tree, depth): 
        list_of_cluster_members.append(find_id_members(node))

    dict_points_labels ={}
    for i, members in enumerate(list_of_cluster_members): 
        for member in members: 
            dict_points_labels[member] = i

    labels = list(map(lambda i : dict_points_labels[i], range(points.shape[0])))

    return labels

In [None]:
def make_cluster_label(ids : np.ndarray, labels : list, complete_column : np.ndarray) -> list[object]: 
    id_label = {}
    for id, label in zip(ids, labels): 
        id_label[id] = label
    return [id_label[id] for id in complete_column]

In [None]:
clustering = AgglomerativeClustering(linkage="ward", distance_threshold = 0.5, n_clusters=None).fit(points)
link_matrix = build_linkage_matrix(clustering)
Tree = to_tree(link_matrix)
stations_grouped = df_lat_long_stations.dropna().loc[:,["Longitude", "Latitude"]].groupby(by = clustering.labels_).mean().to_numpy()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(9,6))
colors = make_colors_from_labels(clustering.labels_)
plot_italian_coast(ax, (x_pen, x_sic, x_sard), (y_pen, y_sic, y_sard), alpha = 0.5)
ax.scatter(*points.T, color=colors, alpha = 1.0)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title("ARPA Stations")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(9,6))
colors = make_colors_from_labels(range(stations_grouped.shape[0]))
plot_italian_coast(ax, (x_pen, x_sic, x_sard), (y_pen, y_sic, y_sard), alpha = 0.5)
ax.scatter(*stations_grouped.T, color=colors, alpha = 1.0)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title("ARPA Stations (grouped)")

In [None]:
#cluster_label = make_cluster_label(
#    df_lat_long_stations.dropna().loc[:,"id"].to_numpy(), 
#    clustering.labels_, 
#    phyto_abundances_simplified.loc[:,"id"].to_numpy()
#    )
#taxon_num_grouped = phyto_abundances_simplified.loc[: , ["Taxon", "Num_cell_l"]].groupby(by = cluster_label)