In [72]:
import typing as typ

import numpy as np

from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# some X -> Actions -> Outcomes <- X
model = DiscreteBayesianNetwork(
    [
        ("X", "A"), # X influences action 
        ("X", "O"), # X influences outcome
        ("A", "O"), # action influences outcome
    ]
)

cpd_X = TabularCPD(
    variable="X",
    variable_card=2,
    values=[[0.5], [0.5]]
)

# P(A|X)
cpd_A = TabularCPD(
    variable="A",
    variable_card=3,
    evidence=["X"],
    evidence_card=[2],
    # uniform if X=0, otherwise: P(A=0|X=1)=0.1, P(A=1|X=1)=0.3, P(A=2|X=1)=0.6
    values=[
        [1/3, 0.1],
        [1/3, 0.3],
        [1/3, 0.6],
    ]
)

# P(O|X,A)
cpd_O = TabularCPD(
    variable="O",
    variable_card=3,
    evidence=["X", "A"],
    evidence_card=[2, 3],
    values=[
        [1/3, 1/6, 1/4, 0.1, 0.05, 0.90],
        [1/3, 1/4, 1/2, 0.1, 0.05, 0.05],
        [1/3, 7/12, 1/4, 0.8, 0.90, 0.05],
    ],
)


model.add_cpds(cpd_X, cpd_A, cpd_O)
assert model.check_model()

O_utility = np.array([10, 30, 20])

In [73]:
class DecisionTheoryResult(typ.NamedTuple):
    dt: str # cdt, edt, etc
    A_to_O_probs: np.ndarray
    A_to_EU: np.ndarray
    best_action: int
    best_action_EU: float

def model_to_var_cards(model: DiscreteBayesianNetwork) -> dict[str, int]:
    return {
        cpd.variable: typ.cast(int, cpd.variable_card)
        for cpd in typ.cast(list[TabularCPD], model.get_cpds())
    }

## EDT

In [74]:
def edt(model: DiscreteBayesianNetwork, O_utility: np.ndarray) -> DecisionTheoryResult:
    var_cards = model_to_var_cards(model)
    assert len(O_utility) == var_cards["O"]
    infer = VariableElimination(model)
    A_to_O_probs = np.array(
        [infer.query(variables=["O"], evidence={"A": a}).values #type:ignore
        for a in range(var_cards["A"])]
    )
    A_to_EU = A_to_O_probs @ O_utility
    best_action = A_to_EU.argmax().item()
    best_action_EU = A_to_EU.max().item()
    return DecisionTheoryResult(
        "edt",
        A_to_O_probs,
        A_to_EU,
        best_action,
        best_action_EU
    )

edt_result = edt(model, O_utility)
print(f"{edt_result.best_action = }")
print(f"{edt_result.best_action_EU = }")

edt_result.best_action = 1
edt_result.best_action_EU = 20.438596491228072


## CDT

In [75]:
def cdt(model: DiscreteBayesianNetwork, O_utility: np.ndarray) -> DecisionTheoryResult:
    var_cards = model_to_var_cards(model)
    assert len(O_utility) == var_cards["O"]
    A_to_O_probs = np.zeros((var_cards["A"], var_cards["O"]))
    for a in range(var_cards["A"]):
        do_model = DiscreteBayesianNetwork(model.edges())
        do_model.remove_edges_from([("X", "A")])
        do_model_cpd_A = TabularCPD(
            variable="A",
            variable_card=var_cards["A"], #TODO: FIX
            values=[[1.0] if a == a_ else [0.0] for a_ in range(var_cards["A"])],
        )
        do_model.add_cpds(cpd_X, do_model_cpd_A, cpd_O)
        assert do_model.check_model()
        do_infer = VariableElimination(model=do_model)
        result = do_infer.query(variables=["O"], evidence={"A": a}).values #type: ignore
        A_to_O_probs[a, :] = result #type:ignore
    A_to_EU = A_to_O_probs @ O_utility
    best_action = A_to_EU.argmax().item()
    best_action_EU = A_to_EU.max().item()
    return DecisionTheoryResult(
        "cdt",
        A_to_O_probs,
        A_to_EU,
        best_action,
        best_action_EU
    )

cdt_res = cdt(model, O_utility)
print(f"{cdt_res.best_action = }")
print(f"{cdt_res.best_action_EU = }")

cdt_res.best_action = 1
cdt_res.best_action_EU = 20.416666666666668
