In [0]:
from pgmpy.models import BayesianModel

In [0]:
cancer_model = BayesianModel([('Pollution','Cancer'),('Smoker', 'Cancer'),('Cancer', 'Xray'),('Cancer','Dyspnoea')])
cancer_model.nodes() 
cancer_model.edges() 
cancer_model.get_cpds()

[]

In [0]:
from pgmpy.factors.discrete import TabularCPD 
 
cpd_poll = TabularCPD(variable='Pollution', variable_card=2,
                      values=[[0.9], [0.1]]) 
cpd_smoke = TabularCPD(variable='Smoker', variable_card=2,
                       values=[[0.3], [0.7]]) 
cpd_cancer = TabularCPD(variable='Cancer', variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],[0.97, 0.95, 0.999, 0.98]],    
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2]) 
cpd_xray = TabularCPD(variable='Xray', variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'], evidence_card=[2]) 
cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'], evidence_card=[2])

In [0]:
# Associating Conditional probabilities with the Bayesian Structure 
# Associating the parameters with the model structure. 
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) 
# Checking if the cpds are valid for the model. 
cancer_model.check_model() 

True

In [0]:
# Doing some simple queries on the network 
print(cancer_model.active_trail_nodes('Pollution', 'Smoker')) 
print(cancer_model.active_trail_nodes(['Pollution', 'Smoker'], observed=['Cancer']))
cancer_model.get_cpds() 


{'Pollution': {'Pollution', 'Xray', 'Cancer', 'Dyspnoea'}}
{'Pollution': {'Pollution', 'Smoker'}, 'Smoker': {'Pollution', 'Smoker'}}


[<TabularCPD representing P(Pollution:2) at 0x7fd6688746a0>,
 <TabularCPD representing P(Smoker:2) at 0x7fd6688b9ba8>,
 <TabularCPD representing P(Cancer:2 | Smoker:2, Pollution:2) at 0x7fd668866be0>,
 <TabularCPD representing P(Xray:2 | Cancer:2) at 0x7fd6688b9be0>,
 <TabularCPD representing P(Dyspnoea:2 | Cancer:2) at 0x7fd6688b96a0>]

In [0]:
#print the conditional probability table of each node(variable) of the bayesian network model
print(cancer_model.get_cpds('Pollution'))
print(cancer_model.get_cpds('Smoker')) 
print(cancer_model.get_cpds('Xray')) 
print(cancer_model.get_cpds('Dyspnoea')) 
print(cancer_model.get_cpds('Cancer'))

+-------------+-----+
| Pollution_0 | 0.9 |
+-------------+-----+
| Pollution_1 | 0.1 |
+-------------+-----+
+----------+-----+
| Smoker_0 | 0.3 |
+----------+-----+
| Smoker_1 | 0.7 |
+----------+-----+
+--------+----------+----------+
| Cancer | Cancer_0 | Cancer_1 |
+--------+----------+----------+
| Xray_0 | 0.9      | 0.2      |
+--------+----------+----------+
| Xray_1 | 0.1      | 0.8      |
+--------+----------+----------+
+------------+----------+----------+
| Cancer     | Cancer_0 | Cancer_1 |
+------------+----------+----------+
| Dyspnoea_0 | 0.65     | 0.3      |
+------------+----------+----------+
| Dyspnoea_1 | 0.35     | 0.7      |
+------------+----------+----------+
+-----------+-------------+-------------+-------------+-------------+
| Smoker    | Smoker_0    | Smoker_0    | Smoker_1    | Smoker_1    |
+-----------+-------------+-------------+-------------+-------------+
| Pollution | Pollution_0 | Pollution_1 | Pollution_0 | Pollution_1 |
+-----------+------------

In [0]:
# Determining the Local independencies of each node
cancer_model.local_independencies('Xray') 
cancer_model.local_independencies('Pollution') 
cancer_model.local_independencies('Smoker') 
cancer_model.local_independencies('Dyspnoea') 
cancer_model.local_independencies('Cancer')
cancer_model.get_independencies() 

(Pollution _|_ Smoker)
(Pollution _|_ Xray, Dyspnoea | Cancer)
(Pollution _|_ Dyspnoea | Xray, Cancer)
(Pollution _|_ Xray, Dyspnoea | Cancer, Smoker)
(Pollution _|_ Xray | Cancer, Dyspnoea)
(Pollution _|_ Dyspnoea | Xray, Cancer, Smoker)
(Pollution _|_ Xray | Dyspnoea, Cancer, Smoker)
(Smoker _|_ Pollution)
(Smoker _|_ Xray, Dyspnoea | Cancer)
(Smoker _|_ Xray, Dyspnoea | Pollution, Cancer)
(Smoker _|_ Dyspnoea | Xray, Cancer)
(Smoker _|_ Xray | Cancer, Dyspnoea)
(Smoker _|_ Dyspnoea | Pollution, Xray, Cancer)
(Smoker _|_ Xray | Pollution, Cancer, Dyspnoea)
(Xray _|_ Pollution, Smoker, Dyspnoea | Cancer)
(Xray _|_ Smoker, Dyspnoea | Pollution, Cancer)
(Xray _|_ Pollution, Dyspnoea | Cancer, Smoker)
(Xray _|_ Pollution, Smoker | Cancer, Dyspnoea)
(Xray _|_ Dyspnoea | Pollution, Cancer, Smoker)
(Xray _|_ Smoker | Pollution, Cancer, Dyspnoea)
(Xray _|_ Pollution | Dyspnoea, Cancer, Smoker)
(Dyspnoea _|_ Pollution, Xray, Smoker | Cancer)
(Dyspnoea _|_ Xray, Smoker | Pollution, Cancer)
(Dy

In [0]:
#.Inferencing with Bayesian Network 
# Doing exact inference using Variable Elimination 
from pgmpy.inference import VariableElimination 
cancer_infer = VariableElimination(cancer_model) 
 
# Computing the probability of bronc given smoke. 
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1}) 
print(q['Cancer']) 

# Computing the probability of bronc given smoke. 
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1,'Pollution': 1}) 
print(q['Cancer'])

+----------+---------------+
| Cancer   |   phi(Cancer) |
| Cancer_0 |        0.0029 |
+----------+---------------+
| Cancer_1 |        0.9971 |
+----------+---------------+
+----------+---------------+
| Cancer   |   phi(Cancer) |
| Cancer_0 |        0.0029 |
+----------+---------------+
| Cancer_1 |        0.9971 |
+----------+---------------+
+----------+---------------+
| Cancer   |   phi(Cancer) |
| Cancer_0 |        0.0200 |
+----------+---------------+
| Cancer_1 |        0.9800 |
+----------+---------------+


  phi.values = phi.values[slice_]


In [0]:
#Diagnosis of heart patients using standard Heart Disease Data Set 
import numpy as np 
from urllib.request import urlopen 
import pandas as pd 

In [0]:
#Importing Heart Disease Data Set and Customizing 
Cleveland_data_URL ='http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'

In [0]:
#np.set_printoptions(threshold=np.nan)
np.set_printoptions()
names = ['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','heartdisease'] 
heartDisease = pd.read_csv(urlopen(Cleveland_data_URL), names = names) 
heartDisease.head() 
del heartDisease['ca'] 
del heartDisease['slope'] 
del heartDisease['thal'] 
del heartDisease['oldpeak'] 
 
heartDisease = heartDisease.replace('?', np.nan) 
heartDisease.dtypes 
heartDisease.columns 

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

In [0]:
#Modeling Heart Disease Data 
from pgmpy.models import BayesianModel 
from pgmpy.estimators import MaximumLikelihoodEstimator
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'), 
                       ('sex','trestbps'),('exang', 'trestbps'),('trestbps','heartdisease'),( 'fbs','heartdisease'),
                       ('heartdisease','restecg'),('heartdisease','thalach' ),('heartdisease','chol')])

In [0]:
# Learing CPDs using Maximum Likelihood Estimators 
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
print(model.get_cpds('age')) 
print(model.get_cpds('chol')) 
print(model.get_cpds('sex')) 
model.get_independencies()

+---------+------------+
| age(28) | 0.00383142 |
+---------+------------+
| age(29) | 0.00383142 |
+---------+------------+
| age(30) | 0.00383142 |
+---------+------------+
| age(31) | 0.00766284 |
+---------+------------+
| age(32) | 0.0153257  |
+---------+------------+
| age(33) | 0.00766284 |
+---------+------------+
| age(34) | 0.0153257  |
+---------+------------+
| age(35) | 0.0191571  |
+---------+------------+
| age(36) | 0.0191571  |
+---------+------------+
| age(37) | 0.0306513  |
+---------+------------+
| age(38) | 0.0191571  |
+---------+------------+
| age(39) | 0.0344828  |
+---------+------------+
| age(40) | 0.0191571  |
+---------+------------+
| age(41) | 0.0383142  |
+---------+------------+
| age(42) | 0.0268199  |
+---------+------------+
| age(43) | 0.0421456  |
+---------+------------+
| age(44) | 0.0268199  |
+---------+------------+
| age(45) | 0.0229885  |
+---------+------------+
| age(46) | 0.045977   |
+---------+------------+
| age(47) | 0.0344828  |


(age _|_ sex, exang)
(age _|_ exang | sex)
(age _|_ sex | exang)
(age _|_ sex, exang | fbs)
(age _|_ thalach, chol, restecg | heartdisease)
(age _|_ exang | fbs, sex)
(age _|_ thalach, chol, restecg | sex, heartdisease)
(age _|_ sex | fbs, exang)
(age _|_ thalach, chol, restecg | exang, heartdisease)
(age _|_ thalach, chol | restecg, heartdisease)
(age _|_ thalach, restecg | chol, heartdisease)
(age _|_ chol, restecg | thalach, heartdisease)
(age _|_ thalach, chol, restecg, heartdisease | fbs, trestbps)
(age _|_ thalach, chol, restecg | fbs, heartdisease)
(age _|_ thalach, chol, restecg | trestbps, heartdisease)
(age _|_ thalach, chol, restecg | sex, exang, heartdisease)
(age _|_ thalach, chol | sex, restecg, heartdisease)
(age _|_ thalach, restecg | chol, sex, heartdisease)
(age _|_ chol, restecg | thalach, sex, heartdisease)
(age _|_ thalach, chol, restecg, heartdisease | trestbps, fbs, sex)
(age _|_ thalach, chol, restecg | fbs, sex, heartdisease)
(age _|_ thalach, chol, restecg | t

In [0]:
#Inferencing with Bayesian Network 
from pgmpy.inference import VariableElimination 
HeartDisease_infer = VariableElimination(model)

In [0]:
# Computing the probability of bronc given smoke. 
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28} ) 
print(q['heartdisease']) 
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100}) 
print(q['heartdisease']) 

+----------------+---------------------+
| heartdisease   |   phi(heartdisease) |
| heartdisease_0 |              0.6333 |
+----------------+---------------------+
| heartdisease_1 |              0.3667 |
+----------------+---------------------+
+----------------+---------------------+
| heartdisease   |   phi(heartdisease) |
| heartdisease_0 |              1.0000 |
+----------------+---------------------+
| heartdisease_1 |              0.0000 |
+----------------+---------------------+


  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]
