# Question 1

![](https://i.imgur.com/g9JbxKN.png)

In [None]:
# Import all the necessary libraries
from pgmpy.base import DAG
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete.CPD import TabularCPD
from pgmpy.inference import VariableElimination

# Create a DAG
g = DAG()

# Add nodes to the DAG
g.add_nodes_from(["income", "deposit", "payment", "housing", "security"])

# Add the edges in the graph
g.add_edges_from(ebunch = [("income", "deposit"), ("income", "payment"), 
                           ("deposit", "payment"),("payment", "security"), 
                           ("housing", "security")])

# Create a bayesian network model
model = BayesianNetwork(g)

# Create conditional probability distributions
cpd_income = TabularCPD('income', 2, [[0.3], [0.7]])
cpd_housing = TabularCPD("housing", 2, [[0.35], [0.65]])
cpd_deposit = TabularCPD("deposit", 2, [[0.1, .6], [0.9, 0.4]], 
                         evidence = ["income"], evidence_card = [2])
cpd_payment = TabularCPD("payment", 2, 
                         [[.05, .5, .45, .6], [.95, 0.5, .55, .4]], 
                         evidence = ["income", "deposit"], 
                         evidence_card = [2, 2])
cpd_security = TabularCPD("security", 2, 
                          [[.01, .5, .75, .31], [.99, 0.5, .25, .69]], 
                          evidence = ["payment", "housing"], 
                          evidence_card = [2, 2])

# Add the CPDs to the model
model.add_cpds(*[cpd_income, cpd_housing, cpd_deposit, 
                 cpd_payment, cpd_security])

## Question

Indicate whether the following independence statements are true or false according to this model. Provide a very brief justification of your answer (not more than two or three sentences)

- **Income ⊥ Security** - False. There's an active information path I -> P -> S.
- **Income ⊥ Security | Payment** - True. Specifying payment breaks active paths between income and security.
- **Income ⊥ Payment** - False. Income and payment have a direct connection.
- **Income ⊥ Security | Payment, Deposit** - True. Specifying payment alone breaks all the direct  connections between income and security
- **Deposit ⊥ Payment** - False. There is a direct connection between the two 
- **Income ⊥ Payment | Deposit** - False. Income and payment have a direct edge; Specifying deposit might stop the influence of payment on income through deposit but there's still a direct connection through which independence can't be established.
___

## Question

Show the factorized form of the joint distribution over all of the variables, P(A, B, C, D, E)

$$
P(A, B, C, D, E) = P(A) \times P(B|A) \times P(C|B, A) \times P(E|C, B, A) \times P(D|E, C, B, A) \\
\therefore P(A, B, C, D, E) = P(A) \times P(B|A) \times P(C|B, A) \times P(E) \times P(D|E, C)
$$

First, applying the chain rule of probability and second, since we know the structure of the graph, expressing individual CPDs in the form $P(X|Pa(X))$.


___

## Question

Find out probability for payment is false, when no prior information is available.

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["income", "deposit", "payment", 
                      "housing", "security"])
g_dist.marginalize(["income", "deposit", "housing", "security"])
print(g_dist)

+------------+----------------+
| payment    |   phi(payment) |
| payment(0) |         0.4935 |
+------------+----------------+
| payment(1) |         0.5065 |
+------------+----------------+


From the above, the probability that the payment is false is `0.4935`


___

## Question

What is the probability that you have got Payment, given that the income is low? 

$P(\bar{C}|\bar{A}) = ?$

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["income", "deposit", "payment", 
                      "housing", "security"])
g_dist.marginalize(["deposit", "housing", "security"])
print(g_dist)

+-----------+------------+-----------------------+
| income    | payment    |   phi(income,payment) |
| income(0) | payment(0) |                0.1365 |
+-----------+------------+-----------------------+
| income(0) | payment(1) |                0.1635 |
+-----------+------------+-----------------------+
| income(1) | payment(0) |                0.3570 |
+-----------+------------+-----------------------+
| income(1) | payment(1) |                0.3430 |
+-----------+------------+-----------------------+


In [None]:
cbar_and_abar = g_dist.values[1][1]

In [None]:
g_dist.marginalize(["payment"])
print(g_dist)

+-----------+---------------+
| income    |   phi(income) |
| income(0) |        0.3000 |
+-----------+---------------+
| income(1) |        0.7000 |
+-----------+---------------+


In [None]:
cbar = g_dist.values[1]

In [None]:
p_cbar_given_abar = cbar_and_abar / cbar
p_cbar_given_abar

0.49

The probability of getting payment given low income is `0.49`


___

## Question

What is the probability that you have got Payment, given that the income is low and you have large deposits?

$P(\bar{C}|\bar{A}, B) = ?$

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["income", "deposit", "payment", 
                      "housing", "security"])
g_dist.marginalize(["housing", "security"])
p_cbar_abar_b = g_dist.values[1][0][1]
p_cbar_abar_b

0.23099999999999998

In [None]:
g_dist.marginalize(["payment"])
p_abar_b = g_dist.values[1][0]
p_abar_b

0.42

In [None]:
p_cbar_given_abar_b = p_cbar_abar_b / p_abar_b
p_cbar_given_abar_b

0.5499999999999999

Probability of getting payment given that the income is low and having high deposits is `0.55`


___

## Question

What is the probability that you didn’t default in payment given high income and no security is given?

$P(\bar{C}|A,\bar{D}) = ?$

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["income", "deposit", "payment", 
                      "housing", "security"])
g_dist.marginalize(["deposit", "housing"])
p_cbar_a_dbar = g_dist.values[0][1][1]
p_cbar_a_dbar

0.08763599999999999

In [None]:
g_dist.marginalize(["payment"])
p_a_dbar = g_dist.values[0][1]
p_a_dbar

0.17929575

In [None]:
p_cbar_given_a_dbar = p_cbar_a_dbar / p_a_dbar
p_cbar_given_a_dbar

0.48877901456113704

Probability of not defaulting in payment given high income and no security is `0.4888`


___

# Question 2

![](https://i.imgur.com/zrf3LtE.png)

In [None]:
# Create a DAG
g = DAG()

# Add nodes to the DAG
g.add_nodes_from(["asia", "smoker", "tb", "lung_cancer", "bronchitis", 
                  "tb_or_lc", "xray", "dispnea"])

# Define relationships in the DAG
g.add_edges_from(ebunch = [("asia", "tb"), ("smoker", "lung_cancer"), 
                           ("smoker", "bronchitis"), ("tb", "tb_or_lc"), 
                           ("lung_cancer", "tb_or_lc"), ("tb_or_lc", "xray"),
                           ("tb_or_lc", "dispnea"), ("bronchitis", "dispnea")])

# Marginal Distribution of Visiting Asia
cpd_asia = TabularCPD('asia', 2, [[0.01], [0.99]])

# Marginal Distribution of Smoking
cpd_smoker = TabularCPD("smoker", 2, [[0.5], [0.5]])

# Conditional Distribution of TB given visit to Asia
cpd_tb = TabularCPD("tb", 2, [[0.05, .01], [0.95, 0.99]], 
                     evidence = ["asia"], evidence_card = [2])

# Conditional Distribution of Lung Cancer given smoker
cpd_lc = TabularCPD("lung_cancer", 2, [[.1, .01], [.9, .99]], 
                     evidence = ["smoker"], evidence_card = [2])

# Conditional Distribution of Bronchitis given smoker
cpd_bronchitis = TabularCPD("bronchitis", 2, [[.6, .3], [.4, .7]], 
                         evidence = ["smoker"], evidence_card = [2])

# TB or Lung Cancer Node modelling
cpd_lc_or_tb = TabularCPD("tb_or_lc", 2, [[1, 1, 1, 0], [0, 0, 0, 1]], 
                          evidence = ["tb", "lung_cancer"], 
                          evidence_card = [2, 2])

# Conditional Distribution of xray given tb or lc
cpd_xray = TabularCPD("xray", 2, [[.98, .05], [.02, .95]], 
                      evidence = ["tb_or_lc"], 
                      evidence_card = [2])

# Conditional Distribution of dispnea given bronchitis or either of lc/tb
cpd_dispnea = TabularCPD("dispnea", 2, [[.9, .8, .7, .1], [.1, .2, .3, .9]], 
                         evidence = ["bronchitis", "tb_or_lc"], 
                         evidence_card = [2, 2])

# Create a BN model and add all the conditional distributions
model = BayesianNetwork(g)
model.add_cpds(*[cpd_asia, cpd_smoker, cpd_tb, cpd_lc, cpd_bronchitis, cpd_lc_or_tb, cpd_xray, cpd_dispnea])

## Question

What is the probability that you have the tuberculosis, given that you have visited Asia, you have Lung Cancer, and you know that you have positive x-ray?

To find:  $P(T | A, L, E, X) = \frac{P(T, A, L, E, X)}{P(A, L, E, X)}$

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["asia", "smoker", "tb", "lung_cancer", 
                      "bronchitis", "tb_or_lc", "xray", "dispnea"])
g_dist.marginalize(["smoker", "bronchitis", "dispnea"])
p_talex = g_dist.values[0][0][0][0][0]
p_talex

2.6950000000000005e-05

In [None]:
g_dist.marginalize(["tb"])
p_alex = g_dist.values[0][0][0][0]
p_alex

0.0005390000000000001

In [None]:
required_cpd = p_talex / p_alex
required_cpd

0.05

The probability that you have the tuberculosis, given that you have visited Asia, you have Lung Cancer, and you know that you have positive x-ray is `0.05`.


___

## Question
What is the probability of having Dispnea given that you have positive x-ray

To find:  $P(D | X) = \frac{P(D, X)}{P(X)}$

In [None]:
infer = VariableElimination(model)
g_dist = infer.query(["asia", "smoker", "tb", "lung_cancer", 
                      "bronchitis", "tb_or_lc", "xray", "dispnea"])
g_dist.marginalize(["asia", "smoker", "tb", "lung_cancer", 
                    "bronchitis", "tb_or_lc"])
p_dx = g_dist.values[0][0]
p_dx

0.07067010440000002

In [None]:
g_dist.marginalize(["dispnea"])
p_xray = g_dist.values[0]
p_xray

0.11029004000000003

In [None]:
p_d_given_x = p_dx / p_xray
p_d_given_x

0.6407659694384007

Probability of having dispnea given positive X-ray is `0.641`.


___

## Question
List all the independencies in the given graph

In [None]:
print(model.get_independencies())

(xray ⟂ dispnea, tb, bronchitis, lung_cancer, asia, smoker | tb_or_lc)
(xray ⟂ asia | tb)
(xray ⟂ smoker, bronchitis | lung_cancer)
(xray ⟂ bronchitis | smoker)
(xray ⟂ tb, bronchitis, lung_cancer, asia, smoker | dispnea, tb_or_lc)
(xray ⟂ asia | dispnea, tb)
(xray ⟂ dispnea, bronchitis, lung_cancer, asia, smoker | tb_or_lc, tb)
(xray ⟂ dispnea, tb, lung_cancer, asia, smoker | tb_or_lc, bronchitis)
(xray ⟂ dispnea, tb, bronchitis, asia, smoker | lung_cancer, tb_or_lc)
(xray ⟂ dispnea, tb, bronchitis, lung_cancer, smoker | asia, tb_or_lc)
(xray ⟂ dispnea, tb, bronchitis, lung_cancer, asia | tb_or_lc, smoker)
(xray ⟂ asia | bronchitis, tb)
(xray ⟂ asia, bronchitis, smoker | lung_cancer, tb)
(xray ⟂ asia, bronchitis | smoker, tb)
(xray ⟂ smoker | lung_cancer, bronchitis)
(xray ⟂ smoker, bronchitis | lung_cancer, asia)
(xray ⟂ bronchitis | lung_cancer, smoker)
(xray ⟂ bronchitis | asia, smoker)
(xray ⟂ lung_cancer, asia, bronchitis, smoker | dispnea, tb_or_lc, tb)
(xray ⟂ lung_cancer, asia