In [1]:
import sys

In [2]:
import networkx

In [3]:
import numpy

In [4]:
import scipy
import pandas

In [5]:
import pyparsing

In [6]:
import tqdm

In [7]:
import joblib

In [8]:
import torch

In [9]:
# Starting with defining the network structure
from pgmpy.models import BayesianModel

cancer_model = BayesianModel([('Pollution', 'Cancer'), 
                              ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'),
                              ('Cancer', 'Dyspnoea')])

In [10]:
# Now defining the parameters.
from pgmpy.factors.discrete import TabularCPD

cpd_poll = TabularCPD(variable='Pollution', variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker', variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer', variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray', variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'], evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'], evidence_card=[2])

In [11]:
# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
cancer_model.check_model()

True

In [12]:
# Doing some simple queries on the network
cancer_model.is_active_trail('Pollution', 'Smoker')

False

In [13]:

cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])

True

In [14]:

cancer_model.local_independencies('Xray')

(Xray _|_ Pollution, Dyspnoea, Smoker | Cancer)

In [15]:
import wget
import gzip
f = wget.download('http://www.bnlearn.com/bnrepository/asia/asia.bif.gz')
with gzip.open('asia.bif.gz', mode='rb') as f:
    file_content=f.read()
with open('asia.bif', mode='wb') as f:
    f.write(file_content)

In [18]:
from pgmpy.readwrite import BIFReader
reader = BIFReader('asia.bif')
!rm asia.bif
asia_model = reader.get_model()

'rm' is not recognized as an internal or external command,
operable program or batch file.


In [19]:
from pgmpy.inference import VariableElimination
asia_infer = VariableElimination(asia_model)

# Computing the probability of bronc given smoke.
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 'no'})
print(q)

Finding Elimination Order: : 100%|██████████| 6/6 [00:00<00:00, 991.60it/s]
Eliminating: either: 100%|██████████| 6/6 [00:00<00:00, 136.03it/s]


+------------+--------------+
| bronc      |   phi(bronc) |
| bronc(yes) |       0.3000 |
+------------+--------------+
| bronc(no)  |       0.7000 |
+------------+--------------+


In [20]:
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 'yes'})
print(q)

Finding Elimination Order: : 100%|██████████| 6/6 [00:00<00:00, 1018.86it/s]
Eliminating: either: 100%|██████████| 6/6 [00:00<00:00, 206.18it/s]


+------------+--------------+
| bronc      |   phi(bronc) |
| bronc(yes) |       0.6000 |
+------------+--------------+
| bronc(no)  |       0.4000 |
+------------+--------------+
