# Bayesian Network

Write a program to construct aBayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API.

In [2]:
#Install dependency
!pip install pgmpy 

Collecting pgmpy
[?25l  Downloading https://files.pythonhosted.org/packages/06/19/d508949e8ac7b32e639f15e854a5f5ed710a4118e4f6692bddaccc390d88/pgmpy-0.1.13-py3-none-any.whl (324kB)
[K     |█                               | 10kB 15.9MB/s eta 0:00:01[K     |██                              | 20kB 19.5MB/s eta 0:00:01[K     |███                             | 30kB 11.0MB/s eta 0:00:01[K     |████                            | 40kB 8.8MB/s eta 0:00:01[K     |█████                           | 51kB 7.9MB/s eta 0:00:01[K     |██████                          | 61kB 8.1MB/s eta 0:00:01[K     |███████                         | 71kB 7.9MB/s eta 0:00:01[K     |████████                        | 81kB 8.1MB/s eta 0:00:01[K     |█████████                       | 92kB 8.4MB/s eta 0:00:01[K     |██████████                      | 102kB 7.4MB/s eta 0:00:01[K     |███████████                     | 112kB 7.4MB/s eta 0:00:01[K     |████████████▏                   | 122kB 7.4MB/s eta 0:0

In [6]:
from pgmpy.models import BayesianModel 
from pgmpy.estimators import MaximumLikelihoodEstimator

In [5]:
# URL to download the dataset
data = "http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data"

In [1]:
# Import all dependencies
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt
import sklearn as skl
import pandas as pd

https://archive.ics.uci.edu/ml/datasets/heart+disease <br>
14 attributes used:
1. #3 (age)  3 age: age in years 
2. #4 (sex)  sex: sex (1 = male; 0 = female) 
3. #9 (cp) chest pain type
        -- Value 1: typical angina
        -- Value 2: atypical angina
        -- Value 3: non-anginal pain
        -- Value 4: asymptomatic 
4. #10 (trestbps) resting blood pressure (in mm Hg on admission to the hospital) 
5. #12 (chol) serum cholestoral in mg/dl 
6. #16 (fbs) (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false) 
7. #19 (restecg) resting electrocardiographic results
        -- Value 0: normal
        -- Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
        -- Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria 
8. #32 (thalach) maximum heart rate achieved 
9. #38 (exang) exercise induced angina (1 = yes; 0 = no) 
10. #40 (oldpeak) oldpeak = ST depression induced by exercise relative to rest 
11. #41 (slope) slope: the slope of the peak exercise ST segment
        -- Value 1: upsloping
        -- Value 2: flat
        -- Value 3: downsloping 
12. #44 (ca) number of major vessels (0-3) colored by flourosopy 
13. #51 (thal)  thal: 3 = normal; 6 = fixed defect; 7 = reversable defect 
14. #58 (num) (the predicted attribute)

In [17]:
# Coulmn names
names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "heartdisease"]
# Read the data from the URL and add the above col names to the dataframe
heartDisease = pd.read_csv(urlopen(data), names=names)
heartDisease.head(3)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0


In [8]:
# Drop columns having large missing values
del heartDisease["ca"]
del heartDisease["slope"]
del heartDisease["thal"]
del heartDisease["oldpeak"]

In [9]:
# Replace missing values with nan, in the rest of the columns which are not dropped
heartDisease = heartDisease.replace("?", np.nan)
heartDisease.dtypes

age              int64
sex              int64
cp               int64
trestbps        object
chol            object
fbs             object
restecg         object
thalach         object
exang           object
heartdisease     int64
dtype: object

In [10]:
# Pass a list of tuples containing the (source,dest) pairs for constructing the network graph as per bayseian belief network
model = BayesianModel([("age", "trestbps"),
                       ("age", "fbs"),
                       ("sex", "trestbps"),
                       ("exang", "trestbps"),
                       ("trestbps", "heartdisease"),
                       ("fbs", "heartdisease"),
                       ("heartdisease", "restecg"),
                       ("heartdisease", "thalach"),
                       ("heartdisease", "chol")])

In [11]:
# fit data to the graph model
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

In [13]:
# get the conditional probability distribution for a given attribute based on training data
print(model.get_cpds("age"))

+---------+------------+
| age(28) | 0.00383142 |
+---------+------------+
| age(29) | 0.00383142 |
+---------+------------+
| age(30) | 0.00383142 |
+---------+------------+
| age(31) | 0.00766284 |
+---------+------------+
| age(32) | 0.0153257  |
+---------+------------+
| age(33) | 0.00766284 |
+---------+------------+
| age(34) | 0.0153257  |
+---------+------------+
| age(35) | 0.0191571  |
+---------+------------+
| age(36) | 0.0191571  |
+---------+------------+
| age(37) | 0.0306513  |
+---------+------------+
| age(38) | 0.0191571  |
+---------+------------+
| age(39) | 0.0344828  |
+---------+------------+
| age(40) | 0.0191571  |
+---------+------------+
| age(41) | 0.0383142  |
+---------+------------+
| age(42) | 0.0268199  |
+---------+------------+
| age(43) | 0.0421456  |
+---------+------------+
| age(44) | 0.0268199  |
+---------+------------+
| age(45) | 0.0229885  |
+---------+------------+
| age(46) | 0.045977   |
+---------+------------+
| age(47) | 0.0344828  |


In [14]:
from pgmpy.inference import VariableElimination

In [15]:
# Perform inference of new data on the model using the variable elimination method
heartDisease_infer = VariableElimination(model)

In [16]:
# Query the data attribute from training by providing any new evidance values you want to test with 
q = heartDisease_infer.query(variables=["heartdisease"],evidence={"age": 29})
# Query will return a probability distribution over all possible values taken by the attribute 
print(q)
# The below means that the person aged 29 has a 66.3% chance of `not having heart disease` and 33.7% chance of `having heart disease`

Finding Elimination Order: : 100%|██████████| 7/7 [00:00<00:00, 2114.98it/s]
Eliminating: fbs: 100%|██████████| 7/7 [00:00<00:00, 355.89it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.6630 |
+-----------------+---------------------+
| heartdisease(1) |              0.3370 |
+-----------------+---------------------+





# End