Meta-Group
diff --git a/‎README.md‎
Lines changed: 10 additions & 7 deletions b/‎README.md‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎dpg/sklearn_dpg.py‎
Lines changed: 25 additions & 16 deletions b/‎dpg/sklearn_dpg.py‎
Lines changed: 25 additions & 16 deletions
diff --git a/‎dpg/utils.py‎
Lines changed: 12 additions & 14 deletions b/‎dpg/utils.py‎
Lines changed: 12 additions & 14 deletions
@@ -46,7 +46,7 @@ The graph-based nature of DPG provides significant enhancements in the direction
 To install DPG locally, first clone the repository:
 
 ```bash
-git clone https://github.com/LeonardoArrighi/DPG.git
+git clone https://github.com/Meta-Group/DPG.git
 cd DPG
 ```
 
@@ -57,7 +57,7 @@ pip install -e .
 
 Alternatively, if using `pip directly`:
 ```bash
-pip install git+https://github.com/LeonardoArrighi/DPG.git
+pip install git+https://github.com/Meta-Group/DPG.git
 ```
 **Troubleshooting:** If you encounter dependency conflicts, we recommend using a virtual environment:
 
@@ -132,11 +132,11 @@ dpg_model, nodes_list = dpg.to_networkx(dot)
 # Extract and visualize
 dpg_metrics = GraphMetrics.extract_graph_metrics(dpg_model, nodes_list,target_names=np.unique(y_train).astype(str).tolist())
 df = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
-plot_dpg("dpg_output.png", dot, df_nodes, dpg_metrics, save_dir="datasets", communities=True, class_flag=True)
+plot_dpg_communities("dpg_output", dot, df, dpg_metrics, save_dir="datasets", class_flag=True, export_pdf=True)
 ```
 #### Output:
 <p align="center">
-  <img src="https://github.com/LeonardoArrighi/DPG/blob/main/dpg_image_examples/dpg_output.png_communities.png?raw=true" width="600" />
+  <img src="https://github.com/LeonardoArrighi/DPG/blob/main/dpg_image_examples/dpg_output_communities.png?raw=true" width="600" />
 </p>
 
 #### CLI scripts
@@ -157,7 +157,7 @@ The DPG output, through `run_dpg_standard.py` or `run_dpg_custom.py`, produces s
 - a `.txt` file containing the Random Forest statistics (accuracy, confusion matrix, classification report)
 
 ## Easy usage
-Usage: `python run_dpg_standard.py --dataset <dataset_name> --n_learners <integer_number> --pv <threshold_value> --t <integer_number> --model_name <str_model_name> --dir <save_dir_path> --plot --save_plot_dir <save_plot_dir_path> --attribute <attribute> --communities --class_flag`
+Usage: `python run_dpg_standard.py --dataset <dataset_name> --n_learners <integer_number> --pv <threshold_value> --t <integer_number> --model_name <str_model_name> --dir <save_dir_path> --plot --save_plot_dir <save_plot_dir_path> --attribute <attribute> --communities --clusters --threshold_clusters <float> --class_flag --seed <int>`
 Where:
 - `dataset` is the name of the standard classification `sklearn` dataset to be analyzed;
 - `n_learners` is the number of base learners for the Random Forest;
@@ -169,9 +169,12 @@ Where:
 - `save_plot_dir` is the path of the directory to save the plot image;
 - `attribute` is the specific node metric which can be visualized on the DPG;
 - `communities` is a store_true variable which can be added to visualize communities on the DPG;
-- `class_flag` is a store_true variable which can be added to highlight class nodes.
+- `clusters` is a store_true variable which can be added to visualize clusters on the DPG;
+- `threshold_clusters` is the threshold used to detect ambiguous nodes in clusters;
+- `class_flag` is a store_true variable which can be added to highlight class nodes;
+- `seed` controls the random split.
 
-Disclaimer: `attribute` and `communities` can not be added together, since DPG supports just one of the two visualizations.
+Disclaimer: `attribute`, `communities`, and `clusters` are mutually exclusive: DPG supports just one visualization mode at a time.
 
 The usage of `run_dpg_custom.py` is similar, but it requires another parameter:
 - `target_column`, which is the name of the column to be used as the target variable;
 
@@ -16,7 +16,7 @@
 from ucimlrepo import fetch_ucirepo
 
 from .core import DecisionPredicateGraph
-from .visualizer import plot_dpg
+from .visualizer import plot_dpg, plot_dpg_communities
 from .utils import get_dpg_edge_metrics, clustering
 from metrics.nodes import NodeMetrics
 from metrics.graph import GraphMetrics
@@ -208,18 +208,27 @@ def test_dpg(datasets: str,
         )
         plot_name +=  f"_{model_name}_l{n_learners}_pv{perc_var}_t{decimal_threshold}_{seed}"
 
-        plot_dpg(
-            plot_name,
-            dot,
-            df,
-            df_edges,
-            df_dpg,
-            save_dir=save_plot_dir,
-            attribute=attribute,
-            communities=communities,
-            clusters=clusters,
-            threshold_clusters=threshold_clusters,
-            class_flag=class_flag
-        )
-    
-    return df, df_edges, df_dpg, clusters, node_prob, confidence
+        if communities:
+            plot_dpg_communities(
+                plot_name,
+                dot,
+                df,
+                df_dpg,
+                save_dir=save_plot_dir,
+                class_flag=class_flag,
+                df_edges=df_edges,
+            )
+        else:
+            plot_dpg(
+                plot_name,
+                dot,
+                df,
+                df_edges,
+                save_dir=save_plot_dir,
+                attribute=attribute,
+                clusters=clusters,
+                threshold_clusters=threshold_clusters,
+                class_flag=class_flag,
+            )
+    
+    return df, df_edges, df_dpg, clusters, node_prob, confidence
@@ -1,4 +1,5 @@
 import os
+import re
 import shutil
 import yaml
 from graphviz import Digraph
@@ -45,22 +46,19 @@ def highlight_class_node(dot, dpg_config=None):
 
     # Iterate over each line in the dot body
     for i, line in enumerate(dot.body):
-        # Extract the node identifier from the line
-        line_id = line.split(' ')[1].replace("\t", "")
-        # Check if the node identifier contains "Class"
-        if "Class" in line_id:
+        # Check for class labels in the node attributes
+        if 'label="Class' in line:
             new_attrs = f'fillcolor="{fillcolor}" shape={shape} style="{style}"'
-            # If node already has attributes, modify them
             if '[' in line:
-                parts = line.split('[')
-                attrs = parts[1].rstrip(']')
-                # Remove existing attributes we're replacing
-                for attr in ['fillcolor', 'shape', 'style']:
-                    attrs = ' '.join([a for a in attrs.split() if not a.startswith(attr)])
-                # Add new attributes
-                dot.body[i] = f"{parts[0]}[{attrs} {new_attrs}]"
+                pre, rest = line.split('[', 1)
+                attrs = rest.rsplit(']', 1)[0]
+                # Remove existing attributes we're replacing (quoted or unquoted)
+                attrs = re.sub(r'\b(fillcolor|shape|style)=(".*?"|[^ \]]+)', '', attrs)
+                attrs = re.sub(r'\s+', ' ', attrs).strip()
+                if attrs:
+                    attrs = attrs + ' '
+                dot.body[i] = f"{pre}[{attrs}{new_attrs}]"
             else:
-                # Node has no attributes yet
                 node_id = line.split(' ')[0]
                 dot.body[i] = f'{node_id} [{new_attrs}]'
 
@@ -312,4 +310,4 @@ def clustering(dpg_model, class_nodes, threshold = None):
                 clusters['Ambiguous'].append(node)
 
 
-    return clusters, node_probs, confidence
+    return clusters, node_probs, confidence