Meta-Group
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dpg/sklearn_dpg.py‎
Lines changed: 4 additions & 5 deletions b/‎dpg/sklearn_dpg.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎dpg/utils.py‎
Lines changed: 0 additions & 189 deletions b/‎dpg/utils.py‎
Lines changed: 0 additions & 189 deletions
diff --git a/‎dpg/visualizer.py‎
Lines changed: 28 additions & 10 deletions b/‎dpg/visualizer.py‎
Lines changed: 28 additions & 10 deletions
@@ -0,0 +1,30 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest
+
+      - name: Run tests
+        run: |
+          pytest -q
@@ -108,7 +108,7 @@ from dpg.core import DecisionPredicateGraph
 from dpg.visualizer import plot_dpg
 from metrics.nodes import NodeMetrics
 from metrics.graph import GraphMetrics
-from dpg.utils import get_dpg_edge_metrics
+from metrics.edges import EdgeMetrics
 
 # Load dataset (last column assumed to be target)
 df = pd.read_csv("datasets/custom.csv", index_col=0)
@@ -131,7 +131,7 @@ dot = dpg.fit(features.values)
 dpg_model, nodes_list = dpg.to_networkx(dot)
 
 # Extract metrics for visualization
-df_edges = get_dpg_edge_metrics(dpg_model, nodes_list)
+df_edges = EdgeMetrics.extract_edge_metrics(dpg_model, nodes_list)
 df_nodes = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
 GraphMetrics.extract_graph_metrics(
     dpg_model,
 
@@ -15,9 +15,9 @@
 
 from .core import DecisionPredicateGraph
 from .visualizer import plot_dpg, plot_dpg_communities
-from .utils import get_dpg_edge_metrics, clustering
 from metrics.nodes import NodeMetrics
 from metrics.graph import GraphMetrics
+from metrics.edges import EdgeMetrics
 
 
 def select_dataset(source: str, target_column: Optional[str] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
@@ -176,14 +176,13 @@ def test_dpg(datasets: str,
     class_nodes = {i[0] : i[1] for i in nodes_list if 'Class' in i[1]}
 
     if clusters_flag:
-        clusters, node_prob, confidence = clustering(dpg_model, class_nodes, threshold_clusters)
+        clusters, node_prob, confidence = GraphMetrics.clustering(dpg_model, class_nodes, threshold_clusters)
     else:
         clusters = node_prob = confidence = None
 
     df = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
-    df_edges = get_dpg_edge_metrics(dpg_model, nodes_list)
-    # df_edges = 0
-    df_dpg = GraphMetrics.extract_graph_metrics(dpg_model, nodes_list,target_names=np.unique(y_train).astype(str).tolist())
+    df_edges = EdgeMetrics.extract_edge_metrics(dpg_model, nodes_list)
+    df_dpg = GraphMetrics.extract_graph_metrics_lpa(dpg_model, nodes_list,target_names=np.unique(y_train).astype(str).tolist())
     # df_dpg = {}
 
     # Plot if requested
 
@@ -122,192 +122,3 @@ def delete_folder_contents(folder_path):
             print(f'Failed to delete {item_path}. Reason: {e}')
 
 
-
-def get_dpg_edge_metrics(dpg_model, nodes_list):
-    """
-    Extracts metrics from the edges of a DPG model, including:
-    - Edge Load Centrality
-    - Trophic Differences
-    
-    Args:
-    dpg_model: A NetworkX graph representing the DPG.
-    nodes_list: A list of nodes where each node is a tuple. The first element is the node identifier and the second is the node label.
-
-    Returns:
-    df: A pandas DataFrame containing the metrics for each edge in the DPG.
-    """
-    
-
-    # Calculate edge weights (assuming edges have 'weight' attribute)
-    edge_weights = nx.get_edge_attributes(dpg_model, 'weight')
-    
-    # Aggiungi le etichette dei nodi
-    edge_data_with_labels = []
-    for u, v in dpg_model.edges():
-        # Ottieni le etichette per i nodi coinvolti nell'arco
-        u_label = next((label for node, label in nodes_list if node == u), None)
-        v_label = next((label for node, label in nodes_list if node == v), None)
-        
-        # Ottieni gli identificativi (ID) per i nodi coinvolti nell'arco
-        u_id = next((node for node, label in nodes_list if node == u), None)
-        v_id = next((node for node, label in nodes_list if node == v), None)
-        
-        # Aggiungi i dati per l'arco con le etichette e gli ID
-        edge_data_with_labels.append([f"{u}-{v}",  
-                                     edge_weights.get((u, v), 0),
-                                     u_label, v_label, u_id, v_id])
-    
-    # Crea un DataFrame con gli archi, le etichette e gli ID
-    df_edges_with_labels = pd.DataFrame(edge_data_with_labels, columns=["Edge", "Weight", 
-                                                                        "Node_u_label", "Node_v_label", "Source_id", "Target_id"])
-    
-
-    # Restituisci il DataFrame risultante
-    return df_edges_with_labels
-
-
-def clustering(dpg_model, class_nodes, threshold = None):
-    
-    classes = sorted(set(class_nodes.values()))
-    class_by_node = dict(class_nodes)
-    class_set = set(class_by_node.keys())
-
-    nodes = list(dpg_model.nodes())
-    n = len(nodes)
-    
-    idx = {idx_node : node for node, idx_node in enumerate(nodes)}
-    
-    # P
-    P = np.zeros((n, n), dtype = float)
-    for node in nodes:
-        i = idx[node]
-        if node in class_set:
-            P[i, i] = 1.0
-            continue
-
-        out_edges = list(dpg_model.out_edges(node, data=True))
-        
-        weight_sum = 0
-
-        for out_node, in_node, weight in out_edges:
-            weight_sum += weight.get('weight', 1)
-
-        if weight_sum > 0:
-            for out_node, in_node, weight in out_edges:
-                j = idx[in_node]
-                P[i, j] = weight.get('weight', 1) / weight_sum
-        else:
-            P[i, i] = 1.0
-    
-    # Order to obtain Q and R
-    transient = []
-    absorbing = []
-    for node in nodes:
-        if node not in class_set:
-            transient.append(node)
-        elif node in class_set:
-            absorbing.append(node)
-
-    t = len(transient)
-
-    perm = transient + absorbing
-    
-    perm_idx = [idx[node] for node in perm]
-    
-    Pp = P[perm_idx][:, perm_idx]
-
-    Q = Pp[:t, :t]
-    R = Pp[:t, t:]
-
-    # N
-    I = np.eye(t)
-    N = np.linalg.solve(I - Q, I)
-
-    # Absorbing probability for each node
-    B = N @ R
-
-    # ----- #
-    class_labels = [class_by_node[node] for node in absorbing]
-
-    class_to_cols = {}
-    for class_index in range(len(absorbing)):
-        label = class_labels[class_index]
-        if label not in class_to_cols:
-            class_to_cols[label] = []
-        class_to_cols[label].append(class_index)
-    
-    # Distribution for transient nodes
-    node_probs = {}
-
-    for index_row in range(len(transient)):
-        node = transient[index_row]
-
-        probs = {}
-        for label in classes:
-            probs[label] = 0.0
-        
-        # sum columns for class
-        for label in classes:
-            cols = class_to_cols.get(label, [])
-            total = 0.0
-            for index_col in cols:
-                total += B[index_row, index_col]
-            probs[label] = total
-
-        node_probs[node] = probs
-    
-    # Distribution for absorbing nodes
-    for node in absorbing:
-        probs = {}
-        for label in classes:
-            probs[label] = 0.0
-        probs[class_nodes[node]] = 1.0
-        
-        node_probs[node] = probs
-
-    # Clusters
-    clusters = {}
-    for label in classes:
-        clusters[label] = []
-    
-    if threshold is not None:
-        clusters['Ambiguous'] = []
-
-    confidence = {}
-
-    for node in nodes:
-        probs = node_probs[node]
-
-        top_label = None
-        top_prob = -1.0
-        second_top_prob = -1.0
-
-        # Top probability and cluster identification
-        for label in classes:
-            prob = probs[label]
-            if prob > top_prob:
-                top_prob = prob
-                top_label = label
-
-        # Second top probability
-        for label in classes:
-            prob = probs[label]
-            if label != top_label and prob > second_top_prob:
-                second_top_prob = prob
-
-        margin = top_prob - (second_top_prob if second_top_prob >= 0.0 else 0.0)
-
-        confidence[node] = margin
-
-        
-        if threshold is None:
-            clusters[top_label].append(node)
-
-        else:
-            if top_prob > threshold:       
-                clusters[top_label].append(node)     
-            else:
-                clusters['Ambiguous'].append(node)
-
-
-    return clusters, node_probs, confidence
@@ -1,6 +1,7 @@
 import os
 import re
 import numpy as np
+import pandas as pd
 from io import BytesIO
 from typing import Dict, List, Optional, TYPE_CHECKING
 from graphviz import Source
@@ -238,7 +239,8 @@ def plot_dpg_communities(
     plot_name: Output base name for saved files (no extension).
     dot: Graphviz Digraph instance representing the DPG structure.
     df: DataFrame with node metrics; must include 'Node' and 'Label' columns.
-    dpg_metrics: Dict containing 'Communities' (list of sets/lists of node labels).
+    dpg_metrics: Dict containing either 'Communities' (list of sets/lists of node labels)
+                 or 'Clusters' (mapping cluster_label -> list of node labels).
     save_dir: Directory where output images are saved. Default is "results/".
     class_flag: If True, class nodes are highlighted in yellow before other coloring.
     df_edges: Optional DataFrame with edge metrics to color edges by weight.
@@ -250,8 +252,8 @@ def plot_dpg_communities(
     """
     print("Plotting DPG (communities)...")
 
-    if dpg_metrics is None or "Communities" not in dpg_metrics:
-        raise AttributeError("dpg_metrics with 'Communities' is required to plot communities.")
+    if dpg_metrics is None:
+        raise AttributeError("dpg_metrics is required to plot communities.")
 
     colormap = cm.YlOrRd  # Choose a colormap
 
@@ -263,24 +265,40 @@ def plot_dpg_communities(
         df = df[~df.Label.str.contains('Class')].reset_index(drop=True)  # Exclude class nodes from further processing
 
     # Map labels to community indices
-    communities = dpg_metrics.get("Communities", [])
+    if "Communities" in dpg_metrics:
+        communities = dpg_metrics.get("Communities", [])
+    elif "Clusters" in dpg_metrics:
+        clusters = dpg_metrics.get("Clusters", {})
+        communities = list(clusters.values())
+    else:
+        raise AttributeError("dpg_metrics must include 'Communities' or 'Clusters' to plot communities.")
+
     label_to_community = {}
     for idx, community in enumerate(communities):
         for label in community:
             label_to_community[label] = idx
     df['Community'] = df['Label'].map(label_to_community)
 
+    if df['Community'].isna().all():
+        raise AttributeError("No nodes matched communities/clusters labels.")
+
     max_score = df['Community'].max()
-    norm = mcolors.Normalize(0, max_score)  # Normalize the community indices
+    if max_score <= 0:
+        norm = mcolors.Normalize(0, 1)
+    else:
+        norm = mcolors.Normalize(0, max_score)  # Normalize the community indices
 
     colors = colormap(norm(df['Community']))  # Assign colors based on normalized community indices
 
     for index, row in df.iterrows():
-        color = "#{:02x}{:02x}{:02x}".format(
-            int(colors[index][0] * 255),
-            int(colors[index][1] * 255),
-            int(colors[index][2] * 255),
-        )
+        if pd.isna(row['Community']):
+            color = "#bdbdbd"
+        else:
+            color = "#{:02x}{:02x}{:02x}".format(
+                int(colors[index][0] * 255),
+                int(colors[index][1] * 255),
+                int(colors[index][2] * 255),
+            )
         change_node_color(dot, row['Node'], color)
 
     plot_name = plot_name + "_communities"