Exercise 1

In [7]:
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# defining a bayesian network structure
model = DiscreteBayesianNetwork([
    ('S', 'O'),
    ('S', 'L'),
    ('S', 'M'),
    ('L', 'M')
])

# Define CPDs
cpd_S = TabularCPD('S', 2, [[0.6], [0.4]])  # P(S=0)=0.6, P(S=1)=0.4

cpd_O = TabularCPD('O', 2,
                   [[0.9, 0.3],   # P(O=0 | S)
                    [0.1, 0.7]],  # P(O=1 | S)
                   evidence=['S'], evidence_card=[2])

cpd_L = TabularCPD('L', 2,
                   [[0.7, 0.2],   # P(L=0 | S)
                    [0.3, 0.8]],  # P(L=1 | S)
                   evidence=['S'], evidence_card=[2]) #spam emails are more likely to contain links within them

cpd_M = TabularCPD('M', 2,
                   [[0.8, 0.5, 0.5, 0.2],   # P(M=0 | S, L) #M depends on bot L and S
                    [0.2, 0.5, 0.5, 0.8]],  # P(M=1 | S, L)
                   evidence=['S', 'L'], evidence_card=[2, 2]) #for ex, spam email with many links might tend to                                            be longer

# Add CPDs to the model
model.add_cpds(cpd_S, cpd_O, cpd_L, cpd_M)

# Check model validity
assert model.check_model()

# Part (a): Independencies
print("Independencies in the network:")
print(model.get_independencies())

# Part (b): Classification - basically computing P(S | O, L, M)
infer = VariableElimination(model)
query = infer.query(variables=['S'], evidence={'O': 1, 'L': 1, 'M': 1})
print("\nP(S | O=1, L=1, M=1):") #if the email has an offer, contains links and it is long => it has a 95.22%                                    probability of being spam
print(query)


Independencies in the network:
(L ⟂ O | S)
(O ⟂ M | S)

P(S | O=1, L=1, M=1):
+------+----------+
| S    |   phi(S) |
| S(0) |   0.0478 |
+------+----------+
| S(1) |   0.9522 |
+------+----------+


Exercise 2

In [8]:
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Define Bayesian Network structure
# D = die category (Prime, Six, Other)
# B = ball added (Black, Red, Blue)
# R = color of drawn ball
model = DiscreteBayesianNetwork([('D', 'B'), ('B', 'R')])

# CPD for die category
cpd_D = TabularCPD('D', 3, [[0.5], [1/6], [1/3]])  # [Prime, Six, Other]

# CPD for the ball added
cpd_B = TabularCPD(
    'B', 3,
    [[1, 0, 0],   # Black
     [0, 1, 0],   # Red
     [0, 0, 1]],  # Blue
    evidence=['D'],
    evidence_card=[3]
)

# Base urn: 3R, 4B, 2K (total 9)
# After adding:
#  - Black: 3R, 4B, 3K → P(Red)=3/10
#  - Red:   4R, 4B, 2K → P(Red)=4/10
#  - Blue:  3R, 5B, 2K → P(Red)=3/10
cpd_R = TabularCPD(
    'R', 3,
    [[3/10, 4/10, 3/10],   # Red
     [4/10, 4/10, 5/10],   # Blue
     [3/10, 2/10, 2/10]],  # Black
    evidence=['B'],
    evidence_card=[3]
)

# Add CPDs
model.add_cpds(cpd_D, cpd_B, cpd_R)
assert model.check_model()

# Inference
infer = VariableElimination(model)
query = infer.query(variables=['R'])

print("Bayesian network estimated probabilities of drawing each color:")
print(query)
print(f"\nP(Red) = {query.values[0]:.4f}")


Bayesian network estimated probabilities of drawing each color:
+------+----------+
| R    |   phi(R) |
| R(0) |   0.3167 |
+------+----------+
| R(1) |   0.4333 |
+------+----------+
| R(2) |   0.2500 |
+------+----------+

P(Red) = 0.3167


Exercise 3

In [9]:
# ==============================================================
# EXERCISE 1) — Simulation: Estimate which player wins more often
# ==============================================================

import random
from math import comb
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

def simulate_game(n_sim=10000):
    wins = {"P0": 0, "P1": 0}

    for _ in range(n_sim):
        # Step 1: Randomly decide who starts
        starter = random.choice(["P0", "P1"])

        # Step 2: Starter rolls a fair die
        n = random.randint(1, 6)

        # Step 3: Other player flips their coin 2n times
        if starter == "P0":
            # P1 flips rigged coin (P(head)=4/7)
            m = sum(1 for _ in range(2 * n) if random.random() < 4/7)
        else:
            # P0 flips fair coin (P(head)=0.5)
            m = sum(1 for _ in range(2 * n) if random.random() < 0.5)

        # Step 4: Determine winner
        if n >= m:
            wins[starter] += 1
        else:
            wins["P1" if starter == "P0" else "P0"] += 1

    total = n_sim
    print("=== Exercise 1: Simulation Results ===")
    print(f"P0 wins: {wins['P0'] / total:.3f}")
    print(f"P1 wins: {wins['P1'] / total:.3f}")
    print("--------------------------------------\n")

simulate_game()


# ==============================================================
# EXERCISE 2) — Define Bayesian Network structure using pgmpy
# ==============================================================


print("=== Exercise 2: Define Bayesian Network ===")

# Define structure:
# S → N (starter decides who rolls the die)
# S → M (coin bias depends on who started)
# N → M (number of flips depends on die result)
model = DiscreteBayesianNetwork([('S', 'N'), ('S', 'M'), ('N', 'M')])

# CPD for who starts: fair coin
cpd_S = TabularCPD('S', 2, [[0.5], [0.5]])  # 0=P0, 1=P1

# CPD for die roll: fair (1–6), same for either player
cpd_N = TabularCPD('N', 6, [[1/6]*2]*6, evidence=['S'], evidence_card=[2])

# CPD for M (number of heads)
# M depends on who flips (S) and number of flips (2N)
rows = []
for m in range(13):  # possible heads: 0–12
    row = []
    for s in [0, 1]:  # 0=P0 starts, 1=P1 starts
        for n in range(1, 7):
            # If S=0 (P0 starts), P1 flips rigged coin (p=4/7)
            # If S=1 (P1 starts), P0 flips fair coin (p=0.5)
            p_head = 4/7 if s == 0 else 0.5
            p_m = comb(2*n, m) * (p_head ** m) * ((1 - p_head) ** (2*n - m))
            row.append(p_m)
    rows.append(row)

cpd_M = TabularCPD('M', 13, rows, evidence=['S', 'N'], evidence_card=[2, 6])

model.add_cpds(cpd_S, cpd_N, cpd_M)
assert model.check_model()
print("Bayesian network successfully defined and validated.")
print("--------------------------------------\n")


# ==============================================================
# EXERCISE 3) — Inference: Who most likely started given M=1?
# ==============================================================

print("=== Exercise 3: Bayesian Inference ===")

infer = VariableElimination(model)
result = infer.query(variables=['S'], evidence={'M': 1})

print("Probability that each player started given M=1:")
print(result)
print(f"\nP(P0 started) = {result.values[0]:.4f}")
print(f"P(P1 started) = {result.values[1]:.4f}")
print("--------------------------------------\n")

if result.values[0] > result.values[1]:
    print(" P0 is more likely to have started when only one head was observed.")
else:
    print(" P1 is more likely to have started when only one head was observed.")


=== Exercise 1: Simulation Results ===
P0 wins: 0.434
P1 wins: 0.566
--------------------------------------

=== Exercise 2: Define Bayesian Network ===
Bayesian network successfully defined and validated.
--------------------------------------

=== Exercise 3: Bayesian Inference ===
Probability that each player started given M=1:
+------+----------+
| S    |   phi(S) |
| S(0) |   0.4529 |
+------+----------+
| S(1) |   0.5471 |
+------+----------+

P(P0 started) = 0.4529
P(P1 started) = 0.5471
--------------------------------------

✅ P1 is more likely to have started when only one head was observed.
