# 7 : Construct a Bayesian Network to demonstrate the diagnosis of heart patients using standard Heart Disease 

In [9]:
# Install if the module doesn't exist
! pip install pgmpy



###### Importing Heart Disease Data Set and Customizing

In [18]:
import pandas as pd
from urllib.request import urlopen

#data_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
data_url = 'https://tinyurl.com/processed-hungarian-data'


#names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']

names = urlopen('https://tinyurl.com/names-csv').read().decode().split(',')   # need a live connection

data = urlopen(data_url)
heart_disease = pd.read_csv(data, names = names)                              # gets Cleveland data

heart_disease.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


###### Dropping columns which are more non numeric 

In [28]:
heart_disease.drop(['ca', 'slope', 'thal', 'oldpeak'], axis=1)

# also replacing '?' with numpy's NaN ( not a number)

import numpy

heart_disease = heart_disease.replace('?', numpy.NaN)



### Modeling Heart Disease Data 
<img src ="heart.png">

In [29]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

associations_list =  [
                       ('age', 'trestbps'),       ('age', 'fbs'),             ('sex', 'trestbps'),  
                       ('exang', 'trestbps'),     ('trestbps','heartdisease'),('fbs','heartdisease'),
                       ('heartdisease','restecg'),('heartdisease','thalach'), ('heartdisease','chol')
                    ]
model = BayesianModel(associations_list)

# Learing CPDs using Maximum Likelihood Estimators
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)


In [58]:
print(model.get_cpds('age'))

+---------+---+
| age(28) | 0 |
+---------+---+
| age(29) | 0 |
+---------+---+
| age(30) | 0 |
+---------+---+
| age(31) | 0 |
+---------+---+
| age(32) | 0 |
+---------+---+
| age(33) | 0 |
+---------+---+
| age(34) | 0 |
+---------+---+
| age(35) | 0 |
+---------+---+
| age(36) | 0 |
+---------+---+
| age(37) | 0 |
+---------+---+
| age(38) | 0 |
+---------+---+
| age(39) | 0 |
+---------+---+
| age(40) | 0 |
+---------+---+
| age(41) | 0 |
+---------+---+
| age(42) | 0 |
+---------+---+
| age(43) | 0 |
+---------+---+
| age(44) | 0 |
+---------+---+
| age(45) | 0 |
+---------+---+
| age(46) | 0 |
+---------+---+
| age(47) | 1 |
+---------+---+
| age(48) | 0 |
+---------+---+
| age(49) | 0 |
+---------+---+
| age(50) | 0 |
+---------+---+
| age(51) | 0 |
+---------+---+
| age(52) | 0 |
+---------+---+
| age(53) | 0 |
+---------+---+
| age(54) | 0 |
+---------+---+
| age(55) | 0 |
+---------+---+
| age(56) | 0 |
+---------+---+
| age(57) | 0 |
+---------+---+
| age(58) | 0 |
+-------

In [31]:
print(model.get_cpds('sex'))

+--------+---+
| sex(0) | 0 |
+--------+---+
| sex(1) | 1 |
+--------+---+


In [32]:
model.get_independencies()

(age _|_ exang, sex)
(age _|_ exang | sex)
(age _|_ exang, sex | fbs)
(age _|_ sex | exang)
(age _|_ thalach, chol, restecg | heartdisease)
(age _|_ exang | fbs, sex)
(age _|_ thalach, chol, restecg | heartdisease, sex)
(age _|_ thalach, restecg | chol, heartdisease)
(age _|_ chol, thalach | restecg, heartdisease)
(age _|_ sex | fbs, exang)
(age _|_ thalach, chol, restecg, heartdisease | trestbps, fbs)
(age _|_ thalach, chol, restecg | fbs, heartdisease)
(age _|_ chol, restecg | thalach, heartdisease)
(age _|_ thalach, chol, restecg | exang, heartdisease)
(age _|_ thalach, chol, restecg | trestbps, heartdisease)
(age _|_ thalach, restecg | chol, heartdisease, sex)
(age _|_ chol, thalach | restecg, heartdisease, sex)
(age _|_ thalach, chol, restecg, heartdisease | trestbps, fbs, sex)
(age _|_ thalach, chol, restecg | fbs, heartdisease, sex)
(age _|_ chol, restecg | thalach, heartdisease, sex)
(age _|_ thalach, chol, restecg | heartdisease, exang, sex)
(age _|_ thalach, chol, restecg | t

###### Inferencing with Bayesian Network

In [51]:
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
heart_disease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
query = heart_disease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(query)

Finding Elimination Order: : 100%|██████████| 7/7 [00:00<00:00, 1932.99it/s]
Eliminating: restecg: 100%|██████████| 7/7 [00:00<00:00, 219.51it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.4919 |
+-----------------+---------------------+
| heartdisease(1) |              0.5081 |
+-----------------+---------------------+



