In [1]:
import numpy as np
import pandas as pd

n = 2500
random_ng = np.random.default_rng(42)

device = random_ng.choice(["mobile", "desktop"], size=n, p=[0.65, 0.35])
traffic_source = random_ng.choice(["search", "ads", "direct", "social"],
                                 size=2500, p=[0.45, 0.25, 0.20, 0.10])
search_use = (traffic_source == "search").astype(int)
filtering_actions = (search_use * (random_ng.random(2500) < 0.6)).astype(int)
basket_activity = ((filtering_actions | (random_ng.random(n) < 0.15)) * (random_ng.random(n) < 0.5)).astype(int)
checkout_behaviour = (basket_activity * (random_ng.random(n) < 0.55)).astype(int)
shipping_research = ((checkout_behaviour | (random_ng.random(n) < 0.1)) * (random_ng.random(n) < 0.7)).astype(int)
purchase_history = random_ng.choice([0, 1], size=n, p=[0.80, 0.20])

# Intent: mostly driven by checkout + some prior + tiny device bump
base = 0.05 + 0.40 * checkout_behaviour + 0.15 * purchase_history + 0.05 * (device == "desktop")
intent = (random_ng.random(n) < np.clip(base, 0, 0.95)).astype(int)



demo_data = pd.DataFrame(
        {
            "traffic_source": traffic_source,
            "search_use": search_use,
            "device": device,
            "purchase_history": purchase_history,
            "filtering_actions": filtering_actions,
            "basket_activity": basket_activity,
            "checkout_behaviour": checkout_behaviour,
            "shipping_research": shipping_research,
            "intent": intent,
        })

demo_data.head(5)

Unnamed: 0,traffic_source,search_use,device,purchase_history,filtering_actions,basket_activity,checkout_behaviour,shipping_research,intent
0,search,1,desktop,0,1,0,0,0,0
1,ads,0,mobile,1,0,0,0,0,0
2,social,0,desktop,0,0,0,0,0,1
3,direct,0,desktop,0,0,0,0,0,0
4,search,1,mobile,0,1,1,0,0,0


In [2]:
from sklearn.model_selection import train_test_split

# Features target separation
features = [
        "search_use", "filtering_actions",
        "basket_activity", "checkout_behaviour"
    ]
target = "intent"

# Standard split
X = demo_data[features].copy()
y = demo_data[target].astype(int)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
# Combine features and target back into DataFrames
train_data = X_train.copy()
train_data['intent'] = y_train

In [4]:
from pgmpy.models.BayesianNetwork import BayesianNetwork
from pgmpy.estimators import BayesianEstimator

model = BayesianNetwork([
    ('filtering_actions', 'search_use'),
    ('filtering_actions', 'basket_activity'),
    ('search_use', 'basket_activity'),
    ('checkout_behaviour', 'basket_activity'),
    ('intent', 'basket_activity'),
])
model.fit(train_data, estimator=BayesianEstimator, prior_type='BDeu', equivalent_sample_size=10)

In [5]:
# Print one of the learned CPD
print(model.cpds[0])

+----------------------+----------+
| filtering_actions(0) | 0.727363 |
+----------------------+----------+
| filtering_actions(1) | 0.272637 |
+----------------------+----------+


In [6]:
print(set(features) - set(model.nodes()))

set()


In [7]:
from pgmpy.inference import VariableElimination

inference = VariableElimination(model)

rows = []
for col in features:
    # states for this variable
    states = model.get_cpds(col).state_names[col]
    for val in states:
        q = inference.query(variables=["intent"], evidence={col: val}, show_progress=False)
        intent_states = list(q.state_names["intent"])

        p_high = float(q.values[intent_states.index(1)])
        p_low  = float(q.values[intent_states.index(0)])

        rows.append({
            "Attribute": col,
            "Value": {0: "No", 1: "Yes"}.get(val, val), # yes and no as in the documnt
            "Intent = High": round(p_high, 2),
            "Intent = Low": round(p_low, 2),
        })

cpt_df = pd.DataFrame(rows, columns=["Attribute", "Value", "Intent = High", "Intent = Low"])
print(cpt_df.to_string(index=False))

         Attribute Value  Intent = High  Intent = Low
        search_use    No           0.14          0.86
        search_use   Yes           0.14          0.86
 filtering_actions    No           0.14          0.86
 filtering_actions   Yes           0.14          0.86
   basket_activity    No           0.14          0.86
   basket_activity   Yes           0.17          0.83
checkout_behaviour    No           0.14          0.86
checkout_behaviour   Yes           0.14          0.86


In [8]:
from pgmpy.inference import VariableElimination
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, roc_auc_score,
    average_precision_score, log_loss, brier_score_loss, confusion_matrix
)

# Inference
infer = VariableElimination(model)

# only chosen columns
evidence_cols = [c for c in X_test.columns if c in model.nodes() and c != "intent"]

y_prob = []
y_pred = []

for _, row in X_test[evidence_cols].iterrows():
    q = infer.query(["intent"], evidence=row.to_dict(), show_progress=False)
    states = list(q.state_names["intent"])          # usually [0, 1]
    p1 = float(q.values[states.index(1)])           # P(intent=1 | evidence)
    y_prob.append(p1)
    y_pred.append(1 if p1 >= 0.5 else 0)

y_prob = np.array(y_prob)
y_pred = np.array(y_pred)

# Metrics
acc = accuracy_score(y_test, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred, average="binary", zero_division=0)
roc = roc_auc_score(y_test, y_prob)
pr_auc = average_precision_score(y_test, y_prob)
ll = log_loss(y_test, np.c_[1 - y_prob, y_prob])
brier = brier_score_loss(y_test, y_prob)
cm = confusion_matrix(y_test, y_pred)

summary = pd.DataFrame([{
    "Model": "BayesNet",
    "Accuracy": round(acc, 4),
    "Precision": round(prec, 4),
    "Recall": round(rec, 4),
    "F1": round(f1, 4),
    "ROC AUC": round(roc, 4),
    "PR AUC": round(pr_auc, 4),
    "Log Loss": round(ll, 4),
    "Brier": round(brier, 4)
}])

print(summary.to_string(index=False))
print("\nConfusion matrix [ [TN FP]\n                     [FN TP] ]")
print(cm)

   Model  Accuracy  Precision  Recall  F1  ROC AUC  PR AUC  Log Loss  Brier
BayesNet     0.858        0.0     0.0 0.0   0.5773   0.185    0.4091  0.122

Confusion matrix [ [TN FP]
                     [FN TP] ]
[[429   0]
 [ 71   0]]
