Skip to content

Commit fea1df7

Browse files
committed
Add edge metrics extraction and CI workflow; update imports and README
1 parent ba8ea27 commit fea1df7

16 files changed

Lines changed: 700 additions & 1433 deletions

.github/workflows/ci.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
test:
9+
runs-on: ubuntu-latest
10+
strategy:
11+
matrix:
12+
python-version: ["3.10", "3.11", "3.12"]
13+
14+
steps:
15+
- uses: actions/checkout@v4
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: ${{ matrix.python-version }}
21+
22+
- name: Install dependencies
23+
run: |
24+
python -m pip install --upgrade pip
25+
pip install -r requirements.txt
26+
pip install pytest
27+
28+
- name: Run tests
29+
run: |
30+
pytest -q

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ from dpg.core import DecisionPredicateGraph
108108
from dpg.visualizer import plot_dpg
109109
from metrics.nodes import NodeMetrics
110110
from metrics.graph import GraphMetrics
111-
from dpg.utils import get_dpg_edge_metrics
111+
from metrics.edges import EdgeMetrics
112112

113113
# Load dataset (last column assumed to be target)
114114
df = pd.read_csv("datasets/custom.csv", index_col=0)
@@ -131,7 +131,7 @@ dot = dpg.fit(features.values)
131131
dpg_model, nodes_list = dpg.to_networkx(dot)
132132

133133
# Extract metrics for visualization
134-
df_edges = get_dpg_edge_metrics(dpg_model, nodes_list)
134+
df_edges = EdgeMetrics.extract_edge_metrics(dpg_model, nodes_list)
135135
df_nodes = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
136136
GraphMetrics.extract_graph_metrics(
137137
dpg_model,

dpg/sklearn_dpg.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515

1616
from .core import DecisionPredicateGraph
1717
from .visualizer import plot_dpg, plot_dpg_communities
18-
from .utils import get_dpg_edge_metrics, clustering
1918
from metrics.nodes import NodeMetrics
2019
from metrics.graph import GraphMetrics
20+
from metrics.edges import EdgeMetrics
2121

2222

2323
def select_dataset(source: str, target_column: Optional[str] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
@@ -176,14 +176,13 @@ def test_dpg(datasets: str,
176176
class_nodes = {i[0] : i[1] for i in nodes_list if 'Class' in i[1]}
177177

178178
if clusters_flag:
179-
clusters, node_prob, confidence = clustering(dpg_model, class_nodes, threshold_clusters)
179+
clusters, node_prob, confidence = GraphMetrics.clustering(dpg_model, class_nodes, threshold_clusters)
180180
else:
181181
clusters = node_prob = confidence = None
182182

183183
df = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
184-
df_edges = get_dpg_edge_metrics(dpg_model, nodes_list)
185-
# df_edges = 0
186-
df_dpg = GraphMetrics.extract_graph_metrics(dpg_model, nodes_list,target_names=np.unique(y_train).astype(str).tolist())
184+
df_edges = EdgeMetrics.extract_edge_metrics(dpg_model, nodes_list)
185+
df_dpg = GraphMetrics.extract_graph_metrics_lpa(dpg_model, nodes_list,target_names=np.unique(y_train).astype(str).tolist())
187186
# df_dpg = {}
188187

189188
# Plot if requested

dpg/utils.py

Lines changed: 0 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -122,192 +122,3 @@ def delete_folder_contents(folder_path):
122122
print(f'Failed to delete {item_path}. Reason: {e}')
123123

124124

125-
126-
def get_dpg_edge_metrics(dpg_model, nodes_list):
127-
"""
128-
Extracts metrics from the edges of a DPG model, including:
129-
- Edge Load Centrality
130-
- Trophic Differences
131-
132-
Args:
133-
dpg_model: A NetworkX graph representing the DPG.
134-
nodes_list: A list of nodes where each node is a tuple. The first element is the node identifier and the second is the node label.
135-
136-
Returns:
137-
df: A pandas DataFrame containing the metrics for each edge in the DPG.
138-
"""
139-
140-
141-
# Calculate edge weights (assuming edges have 'weight' attribute)
142-
edge_weights = nx.get_edge_attributes(dpg_model, 'weight')
143-
144-
# Aggiungi le etichette dei nodi
145-
edge_data_with_labels = []
146-
for u, v in dpg_model.edges():
147-
# Ottieni le etichette per i nodi coinvolti nell'arco
148-
u_label = next((label for node, label in nodes_list if node == u), None)
149-
v_label = next((label for node, label in nodes_list if node == v), None)
150-
151-
# Ottieni gli identificativi (ID) per i nodi coinvolti nell'arco
152-
u_id = next((node for node, label in nodes_list if node == u), None)
153-
v_id = next((node for node, label in nodes_list if node == v), None)
154-
155-
# Aggiungi i dati per l'arco con le etichette e gli ID
156-
edge_data_with_labels.append([f"{u}-{v}",
157-
edge_weights.get((u, v), 0),
158-
u_label, v_label, u_id, v_id])
159-
160-
# Crea un DataFrame con gli archi, le etichette e gli ID
161-
df_edges_with_labels = pd.DataFrame(edge_data_with_labels, columns=["Edge", "Weight",
162-
"Node_u_label", "Node_v_label", "Source_id", "Target_id"])
163-
164-
165-
# Restituisci il DataFrame risultante
166-
return df_edges_with_labels
167-
168-
169-
def clustering(dpg_model, class_nodes, threshold = None):
170-
171-
classes = sorted(set(class_nodes.values()))
172-
class_by_node = dict(class_nodes)
173-
class_set = set(class_by_node.keys())
174-
175-
nodes = list(dpg_model.nodes())
176-
n = len(nodes)
177-
178-
idx = {idx_node : node for node, idx_node in enumerate(nodes)}
179-
180-
# P
181-
P = np.zeros((n, n), dtype = float)
182-
for node in nodes:
183-
i = idx[node]
184-
if node in class_set:
185-
P[i, i] = 1.0
186-
continue
187-
188-
out_edges = list(dpg_model.out_edges(node, data=True))
189-
190-
weight_sum = 0
191-
192-
for out_node, in_node, weight in out_edges:
193-
weight_sum += weight.get('weight', 1)
194-
195-
if weight_sum > 0:
196-
for out_node, in_node, weight in out_edges:
197-
j = idx[in_node]
198-
P[i, j] = weight.get('weight', 1) / weight_sum
199-
else:
200-
P[i, i] = 1.0
201-
202-
# Order to obtain Q and R
203-
transient = []
204-
absorbing = []
205-
for node in nodes:
206-
if node not in class_set:
207-
transient.append(node)
208-
elif node in class_set:
209-
absorbing.append(node)
210-
211-
t = len(transient)
212-
213-
perm = transient + absorbing
214-
215-
perm_idx = [idx[node] for node in perm]
216-
217-
Pp = P[perm_idx][:, perm_idx]
218-
219-
Q = Pp[:t, :t]
220-
R = Pp[:t, t:]
221-
222-
# N
223-
I = np.eye(t)
224-
N = np.linalg.solve(I - Q, I)
225-
226-
# Absorbing probability for each node
227-
B = N @ R
228-
229-
# ----- #
230-
class_labels = [class_by_node[node] for node in absorbing]
231-
232-
class_to_cols = {}
233-
for class_index in range(len(absorbing)):
234-
label = class_labels[class_index]
235-
if label not in class_to_cols:
236-
class_to_cols[label] = []
237-
class_to_cols[label].append(class_index)
238-
239-
# Distribution for transient nodes
240-
node_probs = {}
241-
242-
for index_row in range(len(transient)):
243-
node = transient[index_row]
244-
245-
probs = {}
246-
for label in classes:
247-
probs[label] = 0.0
248-
249-
# sum columns for class
250-
for label in classes:
251-
cols = class_to_cols.get(label, [])
252-
total = 0.0
253-
for index_col in cols:
254-
total += B[index_row, index_col]
255-
probs[label] = total
256-
257-
node_probs[node] = probs
258-
259-
# Distribution for absorbing nodes
260-
for node in absorbing:
261-
probs = {}
262-
for label in classes:
263-
probs[label] = 0.0
264-
probs[class_nodes[node]] = 1.0
265-
266-
node_probs[node] = probs
267-
268-
# Clusters
269-
clusters = {}
270-
for label in classes:
271-
clusters[label] = []
272-
273-
if threshold is not None:
274-
clusters['Ambiguous'] = []
275-
276-
confidence = {}
277-
278-
for node in nodes:
279-
probs = node_probs[node]
280-
281-
top_label = None
282-
top_prob = -1.0
283-
second_top_prob = -1.0
284-
285-
# Top probability and cluster identification
286-
for label in classes:
287-
prob = probs[label]
288-
if prob > top_prob:
289-
top_prob = prob
290-
top_label = label
291-
292-
# Second top probability
293-
for label in classes:
294-
prob = probs[label]
295-
if label != top_label and prob > second_top_prob:
296-
second_top_prob = prob
297-
298-
margin = top_prob - (second_top_prob if second_top_prob >= 0.0 else 0.0)
299-
300-
confidence[node] = margin
301-
302-
303-
if threshold is None:
304-
clusters[top_label].append(node)
305-
306-
else:
307-
if top_prob > threshold:
308-
clusters[top_label].append(node)
309-
else:
310-
clusters['Ambiguous'].append(node)
311-
312-
313-
return clusters, node_probs, confidence

dpg/visualizer.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import re
33
import numpy as np
4+
import pandas as pd
45
from io import BytesIO
56
from typing import Dict, List, Optional, TYPE_CHECKING
67
from graphviz import Source
@@ -238,7 +239,8 @@ def plot_dpg_communities(
238239
plot_name: Output base name for saved files (no extension).
239240
dot: Graphviz Digraph instance representing the DPG structure.
240241
df: DataFrame with node metrics; must include 'Node' and 'Label' columns.
241-
dpg_metrics: Dict containing 'Communities' (list of sets/lists of node labels).
242+
dpg_metrics: Dict containing either 'Communities' (list of sets/lists of node labels)
243+
or 'Clusters' (mapping cluster_label -> list of node labels).
242244
save_dir: Directory where output images are saved. Default is "results/".
243245
class_flag: If True, class nodes are highlighted in yellow before other coloring.
244246
df_edges: Optional DataFrame with edge metrics to color edges by weight.
@@ -250,8 +252,8 @@ def plot_dpg_communities(
250252
"""
251253
print("Plotting DPG (communities)...")
252254

253-
if dpg_metrics is None or "Communities" not in dpg_metrics:
254-
raise AttributeError("dpg_metrics with 'Communities' is required to plot communities.")
255+
if dpg_metrics is None:
256+
raise AttributeError("dpg_metrics is required to plot communities.")
255257

256258
colormap = cm.YlOrRd # Choose a colormap
257259

@@ -263,24 +265,40 @@ def plot_dpg_communities(
263265
df = df[~df.Label.str.contains('Class')].reset_index(drop=True) # Exclude class nodes from further processing
264266

265267
# Map labels to community indices
266-
communities = dpg_metrics.get("Communities", [])
268+
if "Communities" in dpg_metrics:
269+
communities = dpg_metrics.get("Communities", [])
270+
elif "Clusters" in dpg_metrics:
271+
clusters = dpg_metrics.get("Clusters", {})
272+
communities = list(clusters.values())
273+
else:
274+
raise AttributeError("dpg_metrics must include 'Communities' or 'Clusters' to plot communities.")
275+
267276
label_to_community = {}
268277
for idx, community in enumerate(communities):
269278
for label in community:
270279
label_to_community[label] = idx
271280
df['Community'] = df['Label'].map(label_to_community)
272281

282+
if df['Community'].isna().all():
283+
raise AttributeError("No nodes matched communities/clusters labels.")
284+
273285
max_score = df['Community'].max()
274-
norm = mcolors.Normalize(0, max_score) # Normalize the community indices
286+
if max_score <= 0:
287+
norm = mcolors.Normalize(0, 1)
288+
else:
289+
norm = mcolors.Normalize(0, max_score) # Normalize the community indices
275290

276291
colors = colormap(norm(df['Community'])) # Assign colors based on normalized community indices
277292

278293
for index, row in df.iterrows():
279-
color = "#{:02x}{:02x}{:02x}".format(
280-
int(colors[index][0] * 255),
281-
int(colors[index][1] * 255),
282-
int(colors[index][2] * 255),
283-
)
294+
if pd.isna(row['Community']):
295+
color = "#bdbdbd"
296+
else:
297+
color = "#{:02x}{:02x}{:02x}".format(
298+
int(colors[index][0] * 255),
299+
int(colors[index][1] * 255),
300+
int(colors[index][2] * 255),
301+
)
284302
change_node_color(dot, row['Node'], color)
285303

286304
plot_name = plot_name + "_communities"

0 commit comments

Comments
 (0)