In [2]:
import numpy as np

def entropy(probs):
    """Calculate entropy for a list of probabilities"""
    return -np.sum([p * np.log2(p) if p > 0 else 0 for p in probs])

# Calculate H(X)
px1, px2 = 1/3, 2/3
hx = entropy([px1, px2])

# Calculate P(y)
py1_x1, py1_x2 = 2/3, 1/10
py2_x1, py2_x2 = 1/3, 9/10

py1 = py1_x1*px1 + py1_x2*px2
py2 = py2_x1*px1 + py2_x2*px2

# Calculate P(x|y) using Bayes rule
px1_y1 = py1_x1*px1/py1
px2_y1 = py1_x2*px2/py1
px1_y2 = py2_x1*px1/py2
px2_y2 = py2_x2*px2/py2

# Calculate H(X|Y=y) for each y
hx_y1 = entropy([px1_y1, px2_y1])
hx_y2 = entropy([px1_y2, px2_y2])

# Calculate H(X|Y)
hx_y = py1*hx_y1 + py2*hx_y2

# Calculate I(X;Y)
mi = hx - hx_y

print(f"P(y1) = {py1:.4f}")
print(f"P(y2) = {py2:.4f}\n")

print(f"For y1:")
print(f"P(x1|y1) = {px1_y1:.4f}")
print(f"P(x2|y1) = {px2_y1:.4f}")
print(f"H(X|Y=y1) = {hx_y1:.4f} bits\n")

print(f"For y2:")
print(f"P(x1|y2) = {px1_y2:.4f}")
print(f"P(x2|y2) = {px2_y2:.4f}")
print(f"H(X|Y=y2) = {hx_y2:.4f} bits\n")

print(f"H(X) = {hx:.4f} bits")
print(f"H(X|Y) = {hx_y:.4f} bits")
print(f"I(X;Y) = {mi:.4f} bits")

P(y1) = 0.2889
P(y2) = 0.7111

For y1:
P(x1|y1) = 0.7692
P(x2|y1) = 0.2308
H(X|Y=y1) = 0.7793 bits

For y2:
P(x1|y2) = 0.1562
P(x2|y2) = 0.8438
H(X|Y=y2) = 0.6253 bits

H(X) = 0.9183 bits
H(X|Y) = 0.6698 bits
I(X;Y) = 0.2485 bits
