In [1]:
import numpy as np
import pandas as pd

n = 2500
random_ng = np.random.default_rng(42)

device = random_ng.choice(["mobile", "desktop"], size=n, p=[0.65, 0.35])
traffic_source = random_ng.choice(["search", "ads", "direct", "social"],
                                 size=2500, p=[0.45, 0.25, 0.20, 0.10])
search_use = (traffic_source == "search").astype(int)
filtering_actions = (search_use * (random_ng.random(2500) < 0.6)).astype(int)
basket_activity = ((filtering_actions | (random_ng.random(n) < 0.15)) * (random_ng.random(n) < 0.5)).astype(int)
checkout_behaviour = (basket_activity * (random_ng.random(n) < 0.55)).astype(int)
shipping_research = ((checkout_behaviour | (random_ng.random(n) < 0.1)) * (random_ng.random(n) < 0.7)).astype(int)
purchase_history = random_ng.choice([0, 1], size=n, p=[0.80, 0.20])

# Intent: mostly driven by checkout + some prior + tiny device bump
base = 0.05 + 0.40 * checkout_behaviour + 0.15 * purchase_history + 0.05 * (device == "desktop")
intent = (random_ng.random(n) < np.clip(base, 0, 0.95)).astype(int)



demo_data = pd.DataFrame(
        {
            "traffic_source": traffic_source,
            "device": device,
            "purchase_history": purchase_history,
            "filtering_actions": filtering_actions,
            "basket_activity": basket_activity,
            "checkout_behaviour": checkout_behaviour,
            "shipping_research": shipping_research,
            "intent": intent,
        })

demo_data.head(5)

Unnamed: 0,traffic_source,device,purchase_history,filtering_actions,basket_activity,checkout_behaviour,shipping_research,intent
0,search,desktop,0,1,0,0,0,0
1,ads,mobile,1,0,0,0,0,0
2,social,desktop,0,0,0,0,0,1
3,direct,desktop,0,0,0,0,0,0
4,search,mobile,0,1,1,0,0,0


In [2]:
from sklearn.model_selection import train_test_split

# Features target separation
features = [
        "traffic_source", "device", "purchase_history", "filtering_actions",
        "basket_activity", "checkout_behaviour", "shipping_research"
    ]
target = "intent"

# Standard split
X = demo_data[features].copy()
y = demo_data[target].astype(int)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
# Combine features and target back into DataFrames
train_data = X_train.copy()
train_data['intent'] = y_train

In [4]:
from pgmpy.models.BayesianNetwork import BayesianNetwork
from pgmpy.estimators import BayesianEstimator

model = BayesianNetwork([
    ('traffic_source', 'filtering_actions'),
    ('filtering_actions', 'basket_activity'),
    ('basket_activity', 'checkout_behaviour'),
    ('checkout_behaviour', 'shipping_research'),
    ('checkout_behaviour', 'intent'),
    ('purchase_history', 'intent'),
    ('device', 'intent')
])
model.fit(train_data, estimator=BayesianEstimator, prior_type='BDeu', equivalent_sample_size=10)

In [5]:
# Print one of the learned CPD
print(model.cpds[0])

+------------------------+-----------+
| traffic_source(ads)    | 0.258955  |
+------------------------+-----------+
| traffic_source(direct) | 0.190796  |
+------------------------+-----------+
| traffic_source(search) | 0.45398   |
+------------------------+-----------+
| traffic_source(social) | 0.0962687 |
+------------------------+-----------+


In [6]:
from pgmpy.inference import VariableElimination

inference = VariableElimination(model)

rows = []
for col in features:
    # states for this variable
    states = model.get_cpds(col).state_names[col]
    for val in states:
        q = inference.query(variables=["intent"], evidence={col: val}, show_progress=False)
        intent_states = list(q.state_names["intent"])

        p_high = float(q.values[intent_states.index(1)])
        p_low  = float(q.values[intent_states.index(0)])

        rows.append({
            "Attribute": col,
            "Value": {0: "No", 1: "Yes"}.get(val, val), # yes and no as in the documnt
            "Intent = High": round(p_high, 2),
            "Intent = Low": round(p_low, 2),
        })

cpt_df = pd.DataFrame(rows, columns=["Attribute", "Value", "Intent = High", "Intent = Low"])
print(cpt_df.to_string(index=False))

         Attribute   Value  Intent = High  Intent = Low
    traffic_source     ads           0.12          0.88
    traffic_source  direct           0.12          0.88
    traffic_source  search           0.17          0.83
    traffic_source  social           0.12          0.88
            device desktop           0.17          0.83
            device  mobile           0.13          0.87
  purchase_history      No           0.11          0.89
  purchase_history     Yes           0.25          0.75
 filtering_actions      No           0.12          0.88
 filtering_actions     Yes           0.21          0.79
   basket_activity      No           0.10          0.90
   basket_activity     Yes           0.32          0.68
checkout_behaviour      No           0.10          0.90
checkout_behaviour     Yes           0.50          0.50
 shipping_research      No           0.11          0.89
 shipping_research     Yes           0.32          0.68
