# Playing with Joint Probabilities
Given the bayesian network below, we want to generate the complete joint probablity distribution table for the variables modelled in the Bayesian network using the Conditional Probability Tables (CPTs) provided.

![Bayesian Network](fig1.png)

In [1]:
import math
import pandas as pd
import itertools

In [2]:
P_M = {True: 0.2, False: 0.8}
P_S_given_M = {(True, True): 0.8, (False, True): 0.2, (True, False): 0.2, (False, False): 0.8}
P_B_given_M = {(True, True): 0.2, (False, True): 0.8, (True, False): 0.05, (False, False): 0.95}
P_C_given_S_B = {(True, True, True): 0.8, (False, True, True): 0.2, (True, True, False): 0.8, (False, True, False): 0.2, (True, False, True): 0.8, (False, False, True): 0.2, (True, False, False): 0.05, (False, False, False): 0.95}
P_H_given_B = {(True, True): 0.8, (False, True): 0.2, (True, False): 0.6, (False, False): 0.4}

In [3]:
combinations = list(itertools.product([True, False], repeat=5)) # all possible combinations of the 5 variables

### (a) Joint Probability Table

In [4]:
def joint_prob(m, s, b, c, h):
    pm = P_M[m]
    ps_given_m = P_S_given_M[(s, m)]
    pb_given_m = P_B_given_M[(b, m)]
    pc_given_s_b = P_C_given_S_B[(c, s, b)]
    ph_given_b = P_H_given_B[(h, b)]

    p_all = pm * ps_given_m * pb_given_m * pc_given_s_b * ph_given_b

    return pm, ps_given_m, pb_given_m, pc_given_s_b, ph_given_b, p_all

In [5]:
joint_prob_table = []
for comb in combinations:
    m, s, b, c, h = comb
    pm, ps, pb, pc, ph, prob = joint_prob(m, s, b, c, h)
    joint_prob_table.append([m, s, b, c, h, pm, ps, pb, pc, ph, prob])

In [6]:
df = pd.DataFrame(joint_prob_table, columns=["M", "S", "B", "C", "H", "P(M)", "P(S|M)", "P(B|M)", "P(C|S, B)", "P(H|B)", "P(M, S, B, C, H)"])

In [7]:
print("Joint Probability Distribution Table")
print("-"*65)
print(df)
print("-"*65)
total_prob = df['P(M, S, B, C, H)'].sum()
print(f"Sum of all probabilities: {total_prob:.6f}")


Joint Probability Distribution Table
-----------------------------------------------------------------
        M      S      B      C      H  P(M)  P(S|M)  P(B|M)  P(C|S, B)  \
0    True   True   True   True   True   0.2     0.8    0.20       0.80   
1    True   True   True   True  False   0.2     0.8    0.20       0.80   
2    True   True   True  False   True   0.2     0.8    0.20       0.20   
3    True   True   True  False  False   0.2     0.8    0.20       0.20   
4    True   True  False   True   True   0.2     0.8    0.80       0.80   
5    True   True  False   True  False   0.2     0.8    0.80       0.80   
6    True   True  False  False   True   0.2     0.8    0.80       0.20   
7    True   True  False  False  False   0.2     0.8    0.80       0.20   
8    True  False   True   True   True   0.2     0.2    0.20       0.80   
9    True  False   True   True  False   0.2     0.2    0.20       0.80   
10   True  False   True  False   True   0.2     0.2    0.20       0.20   
11   True

### (b) Marginal Probability Table

In [8]:
def marginal_prob(df, var):
    marginal_prob = df.groupby(var).sum()['P(M, S, B, C, H)']
    return marginal_prob

In [9]:
marginal_prob_M = marginal_prob(df, 'M')
marginal_prob_S = marginal_prob(df, 'S')
marginal_prob_B = marginal_prob(df, 'B')
marginal_prob_C = marginal_prob(df, 'C')
marginal_prob_H = marginal_prob(df, 'H')

print(f"Marginal Probabilitis:")
print(f"P(M): {marginal_prob_M[True]:.6f}")
print(f"P(S): {marginal_prob_S[True]:.6f}")
print(f"P(B): {marginal_prob_B[True]:.6f}")
print(f"P(C): {marginal_prob_C[True]:.6f}")
print(f"P(H): {marginal_prob_H[True]:.6f}")

Marginal Probabilitis:
P(M): 0.200000
P(S): 0.320000
P(B): 0.080000
P(C): 0.320000
P(H): 0.616000


### (c) Given a person has 'severe headache' and his serum calcium is not increased as per the tests, what are the chances now that this person will have: (i) Metastatic Cancer, (ii) Brain Tumor

In [10]:
# Given H is True, and S is false, we have to find probability of M: P(M|H=True, S=False). This can be done by finding P(M = True, H=True, S=False) and dividing it by P(H = True and S = False)

p_m_h_true_s_false = df[(df['M'] == True) & (df['H'] == True) & (df['S'] == False)]['P(M, S, B, C, H)'].sum()
p_h_true_s_false = df[(df['H'] == True) & (df['S'] == False)]['P(M, S, B, C, H)'].sum()
p_m_given_h_true_s_false = p_m_h_true_s_false / p_h_true_s_false
print(f"P(M = True | H = True, S = False): {p_m_given_h_true_s_false:.6f}")

P(M = True | H = True, S = False): 0.061538


In [11]:
# Given H is True, and S is False, we have to find the probability of B: P(B|H=True, S=False). This can be done by finding (P(B = True, H = True, S = False) + P(B = False, H = True, S = False)) and dividing it by P(H = True and S = False)

p_b_h_true_s_false = df[(df['B'] == True) & (df['H'] == True) & (df['S'] == False)]['P(M, S, B, C, H)'].sum()
p_h_true_s_false = df[(df['H'] == True) & (df['S'] == False)]['P(M, S, B, C, H)'].sum()
p_b_given_h_true_s_false = p_b_h_true_s_false / p_h_true_s_false
print(f"P(B = True | H = True, S = False): {p_b_given_h_true_s_false:.6f}")

P(B = True | H = True, S = False): 0.076923


### (d) Prove from the JPT that C $\perp$ H $\mid$ B

In [12]:
# To show that C is conditionally independent of H given B, we can simply show that P(C = True, H = True | B = True) = P(C = True | B = True) * P(H = True | B = True)

# Then we can simply find out P(C = True, H = True | B = True), P(C = True | B = True), and P(H = True | B = True) from the JPTs, and show that the relation holds.

p_c_h_b = df[(df['C'] == True) & (df['H'] == True) & (df['B'] == True)]['P(M, S, B, C, H)'].sum()
p_c_h_given_b = p_c_h_b / marginal_prob_B
p_c_h_given_b = p_c_h_given_b[True]

p_c_b = df[(df['C'] == True) & (df['B'] == True)]['P(M, S, B, C, H)'].sum()
p_c_given_b = p_c_b / marginal_prob_B
p_c_given_b = p_c_given_b[True]

p_h_b = df[(df['H'] == True) & (df['B'] == True)]['P(M, S, B, C, H)'].sum()
p_h_given_b = p_h_b / marginal_prob_B
p_h_given_b = p_h_given_b[True]

if round(p_c_h_given_b, 2) == round(p_c_given_b * p_h_given_b, 2):
    print("C is conditionally independent of H given B")
    print(f"P(C = True, H = True | B = True): {p_c_h_given_b:.6f}")
    print(f"P(C = True | B = True) * P(H = True | B = True): {p_c_given_b * p_h_given_b:.6f}")
else:
    print("C is not conditionally independent of H given B")
    print(f"P(C = True, H = True | B = True): {p_c_h_given_b:.6f}")
    print(f"P(C = True | B = True) * P(H = True | B = True): {p_c_given_b * p_h_given_b:.6f}")



C is conditionally independent of H given B
P(C = True, H = True | B = True): 0.640000
P(C = True | B = True) * P(H = True | B = True): 0.640000
