# Exercises 15: Conditional Independence 2

Consider the following joint distribution of three binary variables $P(A,B,C)$:


The three variables are either fully independent, or two of the variables are independent, conditional on the third.

Can you determine which, and what the correct factorisation of the distribution is?

In [27]:
import numpy as np

JointPDF = np.array(
    [
        [
            [0.096, 0.144],
            [0.224, 0.336]
        ],
        [
            [0.09, 0.09],
            [0.01, 0.01],
        ]

    ]
)

In [30]:
print(JointPDF)

[[[0.096 0.144]
  [0.224 0.336]]

 [[0.09  0.09 ]
  [0.01  0.01 ]]]


In [28]:
# Calculate some distributions

PA = JointPDF.sum(axis=(1,2))
PB = JointPDF.sum(axis=(0,2))
PC = JointPDF.sum(axis=(0,1))
print(PA)
print(PB)
print(PC)

[0.8 0.2]
[0.42 0.58]
[0.42 0.58]


In [40]:
# Are they independent?
import itertools
iterator = itertools.product((0,1),repeat=3)
IndependentPDF = np.zeros([2,2,2])
for i in iterator:
    print(i)
    IndependentPDF[i[0],i[1],i[2]] = PA[i[0]] *  PB[i[1]] * PC[i[2]]

print(f"Independent PDF:\n{IndependentPDF}")


# Alternative explicit calculation
IndependentPDF = np.zeros([2,2,2])
IndependentPDF[0,0,0] = PA[0]*PB[0]*PC[0]
IndependentPDF[0,0,1] = PA[0]*PB[0]*PC[1]
IndependentPDF[0,1,0] = PA[0]*PB[1]*PC[0]
IndependentPDF[0,1,1] = PA[0]*PB[1]*PC[1]
IndependentPDF[1,0,0] = PA[0]*PB[0]*PC[0]
IndependentPDF[1,0,1] = PA[0]*PB[0]*PC[1]
IndependentPDF[1,1,0] = PA[1]*PB[1]*PC[0]
IndependentPDF[1,1,1] = PA[1]*PB[1]*PC[1]

print(f"Independent PDF:\n{IndependentPDF}")


(0, 0, 0)
(0, 0, 1)
(0, 1, 0)
(0, 1, 1)
(1, 0, 0)
(1, 0, 1)
(1, 1, 0)
(1, 1, 1)
Independent PDF:
[[[0.14112 0.19488]
  [0.19488 0.26912]]

 [[0.03528 0.04872]
  [0.04872 0.06728]]]
Independent PDF:
[[[0.14112 0.19488]
  [0.19488 0.26912]]

 [[0.14112 0.19488]
  [0.04872 0.06728]]]


In [62]:
# Now, compute some conditionals
# First the three possible joint distributions
P_AB = JointPDF.sum(axis=2)
P_AC = JointPDF.sum(axis=1)
P_BC = JointPDF.sum(axis=0)

print(f"P(A,B)=\n{P_AB}\n")
print(f"P(A,C)=\n{P_AC}\n")
print(f"P(B,C)=\n{P_BC}\n")

# Now the conditionals. Row index the distributed variable, columns the conditioning variable


P_A_B = P_AB/PB
print(f"P(A|B)=\n{P_A_B}\n")
P_B_A = P_AB.T/PA
print(f"P(B|A)=\n{P_B_A}\n")

P_A_C = P_AC/PC
print(f"P(A|C)=\n{P_A_C}\n")
P_C_A = P_AC.T/PA
print(f"P(C|A)=\n{P_C_A}\n")

P_B_C = P_BC.T/PB
print(f"P(B|C)=\n{P_B_C}\n")
P_C_B = P_BC/PC
print(f"P(C|B)=\n{P_C_B}\n")


P(A,B)=
[[0.24 0.56]
 [0.18 0.02]]

P(A,C)=
[[0.32 0.48]
 [0.1  0.1 ]]

P(B,C)=
[[0.186 0.234]
 [0.234 0.346]]

P(A|B)=
[[0.57142857 0.96551724]
 [0.42857143 0.03448276]]

P(B|A)=
[[0.3 0.9]
 [0.7 0.1]]

P(A|C)=
[[0.76190476 0.82758621]
 [0.23809524 0.17241379]]

P(C|A)=
[[0.4 0.5]
 [0.6 0.5]]

P(B|C)=
[[0.44285714 0.40344828]
 [0.55714286 0.59655172]]

P(C|B)=
[[0.44285714 0.40344828]
 [0.55714286 0.59655172]]



We now have all the distributions. There are three possible combinations

a) $P(A,B,C) = P(A\vert C)P(B\vert C)P(C)$  
b) $P(A,B,C) = P(A\vert B)P(C\vert B)P(B)$  
c) $P(A,B,C) = P(C\vert A)P(B\vert A)P(A)$  

In [65]:
# P(A\vert C)P(B\vert C)P(C)
iterator = itertools.product((0,1),repeat=3)
PDF = np.zeros([2,2,2])
for i in iterator:
    PDF[i[0],i[1],i[2]] = P_A_C[i[0],i[2]] *  P_B_C[i[1],i[2]] * PC[i[2]]

print(f"P(A|C)P(B|C)P(C):\n{PDF}")

P(A|C)P(B|C)P(C):
[[[0.14171429 0.19365517]
  [0.17828571 0.28634483]]

 [[0.04428571 0.04034483]
  [0.05571429 0.05965517]]]


In [66]:
# P(A\vert B)P(C\vert B)P(B)
iterator = itertools.product((0,1),repeat=3)
PDF = np.zeros([2,2,2])
for i in iterator:
    PDF[i[0],i[1],i[2]] = P_A_B[i[0],i[1]] *  P_C_B[i[2],i[1]] * PB[i[1]]

print(f"P(A|B)P(C|B)P(B):\n{PDF}")

P(A|B)P(C|B)P(B):
[[[0.10628571 0.13371429]
  [0.22593103 0.33406897]]

 [[0.07971429 0.10028571]
  [0.00806897 0.01193103]]]


In [68]:
# P(B\vert A)P(C\vert A)P(A)
iterator = itertools.product((0,1),repeat=3)
PDF = np.zeros([2,2,2])
for i in iterator:
    PDF[i[0],i[1],i[2]] = P_B_A[i[1],i[0]] *  P_C_A[i[2],i[0]] * PA[i[0]]

print(f"P(A|C)P(B|C)P(C):\n{PDF}")

P(A|C)P(B|C)P(C):
[[[0.096 0.144]
  [0.224 0.336]]

 [[0.09  0.09 ]
  [0.01  0.01 ]]]


This final factorisation matches the JointPDF and so is the correct solution: the Joint PDF was generated using the code in the cell below and the following PDFs:

| $P(A)$ | $P(\lnot A)$ |
|:------:|:-------------|
| 0.8    | 0.2          |


|           | $P(B\vert A)$ | $P(\lnot B\vert A)$ |
|----------:|:-------------:|:--------------------|
| $A$       | 0.3           | 0.7                 |
| $\lnot A$ | 0.9           | 0.1                 |


|           | $P(C\vert A)$ | $P(\lnot C\vert A)$ |
|----------:|:-------------:|:--------------------|
| $A$       | 0.4           | 0.6                 |
| $\lnot A$ | 0.5           | 0.5                 |

In [22]:
import numpy as np
P_A = np.array([0.8,0.2])
P_C_A = np.array([[0.4,0.5],[0.6,0.5]])
P_B_A = np.array([[0.3,0.9],[0.7,0.1]])



JointPDF = np.zeros([2,2,2])
JointPDF[0,0,0] = P_A[0] * P_B_A[0,0] * P_C_A[0,0]
JointPDF[0,0,1] = P_A[0] * P_B_A[0,0] * P_C_A[1,0]
JointPDF[0,1,0] = P_A[0] * P_B_A[1,0] * P_C_A[0,0]
JointPDF[0,1,1] = P_A[0] * P_B_A[1,0] * P_C_A[1,0]
JointPDF[1,0,0] = P_A[1] * P_B_A[0,1] * P_C_A[0,1]
JointPDF[1,0,1] = P_A[1] * P_B_A[0,1] * P_C_A[1,1]
JointPDF[1,1,0] = P_A[1] * P_B_A[1,1] * P_C_A[0,1]
JointPDF[1,1,1] = P_A[1] * P_B_A[1,1] * P_C_A[1,1]

print(JointPDF)
print(JointPDF.sum())

[[[0.096 0.144]
  [0.224 0.336]]

 [[0.09  0.09 ]
  [0.01  0.01 ]]]
1.0
