In [41]:
import typing as typ

import numpy as np

from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# some X -> Actions -> Outcomes <- X
model = DiscreteBayesianNetwork(
    [
        ("X", "A"), # X influences action 
        ("X", "O"), # X influences outcome
        ("A", "O"), # action influences outcome
    ]
)

cpd_X = TabularCPD(
    variable="X",
    variable_card=2,
    values=[[0.5], [0.5]]
)

# P(A|X)
cpd_A = TabularCPD(
    variable="A",
    variable_card=3,
    evidence=["X"],
    evidence_card=[2],
    # uniform if X=0, otherwise: P(A=0|X=1)=0.1, P(A=1|X=1)=0.3, P(A=2|X=1)=0.6
    values=[
        [1/3, 0.1],
        [1/3, 0.3],
        [1/3, 0.6],
    ]
)

# P(O|X,A)
cpd_O = TabularCPD(
    variable="O",
    variable_card=3,
    evidence=["X", "A"],
    evidence_card=[2, 3],
    values=[
        [1/3, 1/2, 1/4, 0.1, 0.05, 0.90],
        [1/3, 1/2, 1/2, 0.1, 0.05, 0.05],
        [1/3, 0.0, 1/4, 0.8, 0.90, 0.05],
    ],
)


model.add_cpds(cpd_X, cpd_A, cpd_O)
assert model.check_model()

O_utility = np.array([10, 30, 20])

## EDT

In [None]:
VERBOSE = True

def edt(model: DiscreteBayesianNetwork, O_utility: np.ndarray, *, verbose: bool = False) -> tuple[int, float]: #TODO: decision theory result type or sth?
    assert len(O_utility) == cpd_O.variable_card
    infer = VariableElimination(model)
    # O_probs = { 
    #     a: infer.query(variables=["O"], evidence={"A": a}).values #type:ignore
    #     for a in range(typ.cast(int, cpd_A.variable_card))
    # }
    # print(O_probs[0])
    A_to_O_probs = np.array(
        [infer.query(variables=["O"], evidence={"A": a}).values #type:ignore
        for a in range(typ.cast(int, cpd_A.variable_card))]
    )
    A_to_EU = A_to_O_probs @ O_utility
    if verbose:
        print(A_to_O_probs)
        print(A_to_EU)
    best_action = A_to_EU.argmax().item()
    best_action_value = A_to_EU.max().item()
    return best_action, best_action_value

best_action, best_action_value = edt(model, O_utility, verbose=VERBOSE)
print(f"{best_action = }")
print(f"{best_action_value = }")

[[0.27948718 0.27948718 0.44102564]
 [0.28684211 0.28684211 0.42631579]
 [0.66785714 0.21071429 0.12142857]]
[20.         20.         15.42857143]
best_action = 1
best_action_value = 20.0


## CDT

In [None]:
def cdt(model: DiscreteBayesianNetwork, O_utility: np.ndarray, *, verbose: bool = False) -> tuple[int, float]:
    assert len(O_utility) == cpd_O.variable_card #TODO: here and in cdt this does a check against a global variable that is assumed to have been used in defining model lol; FIX THIS!!!
    A_card = typ.cast(int, cpd_A.variable_card)
    O_card = typ.cast(int, cpd_O.variable_card)
    A_to_O_probs = np.zeros((A_card, O_card))
    for a in range(A_card):
        do_model = DiscreteBayesianNetwork(model.edges())
        do_model.remove_edges_from([("X", "A")])
        do_model_cpd_A = TabularCPD(
            variable="A",
            variable_card=A_card, #TODO: FIX
            values=[[1.0] if a == a_ else [0.0] for a_ in range(A_card)],
        )
        do_model.add_cpds(cpd_X, do_model_cpd_A, cpd_O)
        assert do_model.check_model()
        do_infer = VariableElimination(model=do_model)
        result = do_infer.query(variables=["O"], evidence={"A": a}).values #type: ignore
        A_to_O_probs[a, :] = result #type:ignore
    A_to_EU = A_to_O_probs @ O_utility
    if verbose:
        print(A_to_O_probs)
        print(A_to_EU)
    best_action = A_to_EU.argmax().item()
    best_action_value = A_to_EU.max().item()
    return best_action, best_action_value

best_action, best_action_value = cdt(model, O_utility, verbose=VERBOSE)
print(f"{best_action = }")
print(f"{best_action_value = }")

[[0.21666667 0.21666667 0.56666667]
 [0.275      0.275      0.45      ]
 [0.575      0.275      0.15      ]]
[20. 20. 17.]
best_action = 0
best_action_value = 20.0
