# ðŸŒ³ Decision Predicate Graph (DPG)
This notebook shows a minimal working example of using DPG on a classification dataset.


In [9]:
%pip install git+https://github.com/Meta-Group/DPG.git

Collecting git+https://github.com/Meta-Group/DPG.git
  Cloning https://github.com/Meta-Group/DPG.git to /tmp/pip-req-build-8cng9mbc
  Running command git clone --filter=blob:none --quiet https://github.com/Meta-Group/DPG.git /tmp/pip-req-build-8cng9mbc
  Resolved https://github.com/Meta-Group/DPG.git to commit a906954df2e4361f48bd10ceec3c49dec1a44b81
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from dpg.core import DecisionPredicateGraph
from dpg.visualizer import plot_dpg

## ðŸ“¥ Load your dataset

In [11]:
# Assumes your dataset is in datasets/custom.csv
# Last column is the target
df = pd.read_csv("../datasets/custom.csv", index_col=0)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

## ðŸ§  Train a Random Forest

In [12]:
model = RandomForestClassifier(n_estimators=10, random_state=27)
model.fit(X, y)

## ðŸ“Š Generate and visualize DPG

In [13]:
feature_names = X.columns.tolist()
class_names = np.unique(y).astype(str).tolist()

dpg = DecisionPredicateGraph(
    model=model,
    feature_names=feature_names,
    target_names=class_names,
    decimal_threshold=2,
    n_jobs=1
)
dot = dpg.fit(X.values)
dpg_model, nodes_list = dpg.to_networkx(dot)

dpg_metrics = dpg.extract_graph_metrics(dpg_model, nodes_list)
df_nodes = dpg.extract_node_metrics(dpg_model, nodes_list)

plot_dpg("dpg_output.png", dot, df_nodes, dpg_metrics, save_dir="../datasets", communities=True, class_flag=False)

TypeError: DecisionPredicateGraph.__init__() got an unexpected keyword argument 'perc_var'

## Communities

In [None]:
import pandas as pd

rows = []
for group in dpg_metrics["Communities"]:
    class_name = [s for s in group if s.startswith("Class")][0]
    predicates = group - {class_name}
    for pred in predicates:
        rows.append({"Class": class_name, "Predicate": pred})

df = pd.DataFrame(rows)

# Group predicates by class
grouped_df = df.groupby("Class")["Predicate"].apply(lambda x: ", ".join(sorted(x))).reset_index()
grouped_df

## Class Bounds

In [None]:
import pandas as pd

# Assuming dpg_metrics is already defined and contains "Class Bounds"
class_bounds = dpg_metrics.get("Class Bounds", {})

# Convert to DataFrame: one row per class with constraints joined
df_bounds = pd.DataFrame([
    {"Class": class_name, "Constraints": ", ".join(sorted(bounds))}
    for class_name, bounds in class_bounds.items()
])

# Optional: sort alphabetically by class name
df_bounds = df_bounds.sort_values("Class")
df_bounds


## Predicates (nodes) explanation

In [None]:
import matplotlib.pyplot as plt

# Sort by Local Reaching Centrality
df_sorted = df_nodes.sort_values("Local reaching centrality", ascending=False).head(10)

# Plot
plt.figure(figsize=(10, 6))
plt.barh(df_sorted["Label"], df_sorted["Local reaching centrality"], color="skyblue")
plt.xlabel("Local Reaching Centrality")
plt.ylabel("Label")
plt.title("Ranking of Local Reaching Centrality (LRC)")
plt.gca().invert_yaxis()  # Highest at the top
plt.tight_layout()
plt.show()
