In [1]:
import numpy as np
from pgmpy.factors.discrete.CPD import TabularCPD
from pgmpy.models import BayesianNetwork
from pgmpy.sampling import BayesianModelSampling
from pgmpy.utils import get_example_model
from typing import cast
from util import get_in_out_nodes, calc_accuracy

SEED = 42


def add_noise(model: BayesianNetwork, std_dev = 0.05, r_seed: int | None = None) -> None:
    mean = 0.0
    mn = mean - 3 * std_dev
    mx = mean + 3 * std_dev
    rng = np.random.default_rng(r_seed)
    for cpd in cast(list[TabularCPD], model.get_cpds()):
        noise = rng.normal(mean, std_dev, cpd.values.shape)
        noise = np.clip(noise, mn, mx)
        cpd.values += noise
        cpd.normalize(inplace=True)


def check_noisiest_allowed(
        model_name: str,
        step = 0.05,
        tol = 2.0,
        r_seed: int | None = None) -> None:
    model = get_example_model(model_name)
    evidence_vars, query_vars = get_in_out_nodes(model)
    test_samples = BayesianModelSampling(model).forward_sample(
        size=2000, seed=r_seed, show_progress=False
    )

    accs_orig = calc_accuracy(test_samples, model, evidence_vars, query_vars)
    acc_orig = sum(accs_orig.values()) / len(accs_orig)

    std_dev = 0.0
    diff = 0.0

    # Note: accuracy does not degrade linearly as step increases
    while diff < tol:
        std_dev += step
        noisy_model = model.copy()
        add_noise(noisy_model, std_dev, r_seed)
        accs_noisy = calc_accuracy(test_samples, noisy_model, evidence_vars, query_vars)
        acc_noisy = sum(accs_noisy.values()) / len(accs_noisy)
        diff = abs(acc_orig - acc_noisy)

    std_dev -= step

    if not std_dev:
        print("No suitable noise threshold found.")
        return

    noisy_model = model.copy()
    add_noise(noisy_model, std_dev, r_seed)
    accs_noisy = calc_accuracy(test_samples, noisy_model, evidence_vars, query_vars)
    acc_noisy = sum(accs_noisy.values()) / len(accs_noisy)
    diff = abs(acc_orig - acc_noisy)
    print("Standard Deviation:", std_dev, "|", "Difference:", acc_orig - acc_noisy)
    print()


    for var in evidence_vars + query_vars:
        print("Original")
        print(model.get_cpds(var))
        print("Noisy")
        print(noisy_model.get_cpds(var))
        print()

In [2]:
check_noisiest_allowed("asia", r_seed=SEED)

Standard Deviation: 0.5499999999999999 | Difference: 0.0

Original
+-----------+------+
| asia(yes) | 0.01 |
+-----------+------+
| asia(no)  | 0.99 |
+-----------+------+
Noisy
+-----------+----------+
| asia(yes) | 0.298176 |
+-----------+----------+
| asia(no)  | 0.701824 |
+-----------+----------+

Original
+------------+-----+
| smoke(yes) | 0.5 |
+------------+-----+
| smoke(no)  | 0.5 |
+------------+-----+
Noisy
+------------+---------+
| smoke(yes) | 0.53072 |
+------------+---------+
| smoke(no)  | 0.46928 |
+------------+---------+

Original
+-----------+-------------+------------+
| either    | either(yes) | either(no) |
+-----------+-------------+------------+
| xray(yes) | 0.98        | 0.05       |
+-----------+-------------+------------+
| xray(no)  | 0.02        | 0.95       |
+-----------+-------------+------------+
Noisy
+-----------+---------------------+---------------------+
| either    | either(yes)         | either(no)          |
+-----------+-------------------

In [3]:
check_noisiest_allowed("alarm", r_seed=SEED)

Standard Deviation: 0.10000000000000002 | Difference: 0.5318181818181671

Original
+--------------------+-----+
| HYPOVOLEMIA(TRUE)  | 0.2 |
+--------------------+-----+
| HYPOVOLEMIA(FALSE) | 0.8 |
+--------------------+-----+
Noisy
+--------------------+----------+
| HYPOVOLEMIA(TRUE)  | 0.109966 |
+--------------------+----------+
| HYPOVOLEMIA(FALSE) | 0.890034 |
+--------------------+----------+

Original
+------------------+------+
| LVFAILURE(TRUE)  | 0.05 |
+------------------+------+
| LVFAILURE(FALSE) | 0.95 |
+------------------+------+
Noisy
+------------------+-----------+
| LVFAILURE(TRUE)  | -0.128219 |
+------------------+-----------+
| LVFAILURE(FALSE) |  1.12822  |
+------------------+-----------+

Original
+---------------------+------+
| ERRLOWOUTPUT(TRUE)  | 0.05 |
+---------------------+------+
| ERRLOWOUTPUT(FALSE) | 0.95 |
+---------------------+------+
Noisy
+---------------------+----------+
| ERRLOWOUTPUT(TRUE)  | 0.160822 |
+---------------------+----------+

In [4]:
check_noisiest_allowed("child", r_seed=SEED)

Standard Deviation: 0.10000000000000002 | Difference: 1.9571428571428626

Original
+--------------------+-----+
| BirthAsphyxia(yes) | 0.1 |
+--------------------+-----+
| BirthAsphyxia(no)  | 0.9 |
+--------------------+-----+
Noisy
+--------------------+-----------+
| BirthAsphyxia(yes) | 0.0979508 |
+--------------------+-----------+
| BirthAsphyxia(no)  | 0.902049  |
+--------------------+-----------+

Original
+----------------+----------+---------+
| LVH            | LVH(yes) | LVH(no) |
+----------------+----------+---------+
| LVHreport(yes) | 0.9      | 0.05    |
+----------------+----------+---------+
| LVHreport(no)  | 0.1      | 0.95    |
+----------------+----------+---------+
Noisy
+----------------+----------------------+---------------------+
| LVH            | LVH(yes)             | LVH(no)             |
+----------------+----------------------+---------------------+
| LVHreport(yes) | 1.1028765955893982   | 0.04808060729247888 |
+----------------+---------------------