# ðŸŒ³ Decision Predicate Graph (DPG)
This notebook shows a minimal working example of using DPG on a classification dataset.


In [3]:
%pip install git+https://github.com/Meta-Group/DPG.git

Collecting git+https://github.com/Meta-Group/DPG.git
  Cloning https://github.com/Meta-Group/DPG.git to /tmp/pip-req-build-kvme16o6
  Running command git clone --filter=blob:none --quiet https://github.com/Meta-Group/DPG.git /tmp/pip-req-build-kvme16o6
  Resolved https://github.com/Meta-Group/DPG.git to commit e968bd6354fd3bc4517c5ec1bcd6eea64d3868ad
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [17]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from dpg.core import DecisionPredicateGraph
from dpg.visualizer import plot_dpg, plot_dpg_communities
from metrics.graph import GraphMetrics
from metrics.nodes import NodeMetrics
from dpg.utils import get_dpg_edge_metrics

## ðŸ“¥ Load your dataset

In [18]:
!wget -q -O custom.csv https://raw.githubusercontent.com/Meta-Group/DPG/main/datasets/custom.csv

df = pd.read_csv("custom.csv", index_col=0)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

## ðŸ§  Train a Random Forest

In [19]:
model = RandomForestClassifier(n_estimators=10, random_state=27)
model.fit(X, y)

## ðŸ“Š Generate and visualize DPG

In [21]:
feature_names = X.columns.tolist()
class_names = np.unique(y).astype(str).tolist()

dpg = DecisionPredicateGraph(
    model=model,
    feature_names=feature_names,
    target_names=class_names,
)

dot = dpg.fit(X.values)
dpg_model, nodes_list = dpg.to_networkx(dot)

dpg_metrics = GraphMetrics.extract_graph_metrics(
    dpg_model,
    nodes_list,
    target_names=class_names,
)
df_nodes = NodeMetrics.extract_node_metrics(dpg_model, nodes_list)
df_edges = get_dpg_edge_metrics(dpg_model, nodes_list)

#plot_dpg(
    #"dpg_output",
    #dot,
    #df_nodes,
    #df_edges,
    #save_dir="../results",
    #class_flag=False,
#)

plot_dpg_communities(
    "dpg_output",
    dot,
    df_nodes,
    dpg_metrics,
    save_dir="../results",
    class_flag=True,
    df_edges=df_edges,
)


Config file not found at 'config.yaml'. Using built-in defaults.
DPG initialized with perc_var=1e-09, decimal_threshold=6, n_jobs=-1

Starting DPG extraction *****************************************
Model Class: RandomForestClassifier
Model Class Module: sklearn.ensemble._forest
Model Estimators:  10
Model Params:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 10, 'n_jobs': None, 'oob_score': False, 'random_state': 27, 'verbose': 0, 'warm_start': False}
*****************************************************************


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 177/177 [00:00<00:00, 967.65it/s]


Total of paths: 1770
Building DPG...


Processing cases: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1770/1770 [00:00<00:00, 4677.64it/s]


Extracting graph...


NameError: name 'plot_dpg_communities' is not defined

## Communities

In [None]:
import pandas as pd

rows = []
for group in dpg_metrics["Communities"]:
    class_name = [s for s in group if s.startswith("Class")][0]
    predicates = group - {class_name}
    for pred in predicates:
        rows.append({"Class": class_name, "Predicate": pred})

df = pd.DataFrame(rows)

# Group predicates by class
grouped_df = df.groupby("Class")["Predicate"].apply(lambda x: ", ".join(sorted(x))).reset_index()
grouped_df

## Class Bounds

In [None]:
import pandas as pd

# Assuming dpg_metrics is already defined and contains "Class Bounds"
class_bounds = dpg_metrics.get("Class Bounds", {})

# Convert to DataFrame: one row per class with constraints joined
df_bounds = pd.DataFrame([
    {"Class": class_name, "Constraints": ", ".join(sorted(bounds))}
    for class_name, bounds in class_bounds.items()
])

# Optional: sort alphabetically by class name
df_bounds = df_bounds.sort_values("Class")
df_bounds


## Predicates (nodes) explanation

In [None]:
import matplotlib.pyplot as plt

# Sort by Local Reaching Centrality
df_sorted = df_nodes.sort_values("Local reaching centrality", ascending=False).head(10)

# Plot
plt.figure(figsize=(10, 6))
plt.barh(df_sorted["Label"], df_sorted["Local reaching centrality"], color="skyblue")
plt.xlabel("Local Reaching Centrality")
plt.ylabel("Label")
plt.title("Ranking of Local Reaching Centrality (LRC)")
plt.gca().invert_yaxis()  # Highest at the top
plt.tight_layout()
plt.show()
