In [None]:
!pip install pgmpy



In [None]:
import numpy as np
import matplotlib.pyplot as plt # Visuals
import seaborn as sns
import sklearn as skl
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import JointProbabilityDistribution
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.inference import VariableElimination

In [None]:
heartDisease = pd.read_csv('./sample_data/heart_disease_uci.csv')
heartDisease

Unnamed: 0,id,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,1,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,2,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,3,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,4,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,5,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,916,54,Female,VA Long Beach,asymptomatic,127.0,333.0,True,st-t abnormality,154.0,False,0.0,,,,1
916,917,62,Male,VA Long Beach,typical angina,,139.0,False,st-t abnormality,,,,,,,0
917,918,55,Male,VA Long Beach,asymptomatic,122.0,223.0,True,st-t abnormality,100.0,False,0.0,,,fixed defect,2
918,919,58,Male,VA Long Beach,asymptomatic,,385.0,True,lv hypertrophy,,,,,,,0


In [None]:
del heartDisease['id']
del heartDisease['dataset']
del heartDisease['oldpeak']
del heartDisease['slope']
del heartDisease['ca']
del heartDisease['thal']

In [None]:
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,num
0,63,Male,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,0
1,67,Male,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,2
2,67,Male,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,1
3,37,Male,non-anginal,130.0,250.0,False,normal,187.0,False,0
4,41,Female,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,0


In [None]:
print(pd.isna(heartDisease).sum())

age          0
sex          0
cp           0
trestbps    59
chol        30
fbs         90
restecg      2
thalch      55
exang       55
num          0
dtype: int64


In [None]:
heartDisease = heartDisease.dropna()

In [None]:
print(pd.isna(heartDisease).sum())

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalch      0
exang       0
num         0
dtype: int64


In [None]:
heartDisease

In [None]:
model = BayesianNetwork([('age', 'trestbps'), ('sex', 'fbs'), ('trestbps', 'chol'),
                       ('fbs', 'chol'),('chol','num'),('restecg','num'),
                      ('thalch','exang'),('exang','cp'),('cp','num')])
model

<pgmpy.models.BayesianNetwork.BayesianNetwork at 0x7fb0c70a5840>

In [None]:
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

In [None]:
model.get_cpds()

[<TabularCPD representing P(age:50) at 0x7fb0c70a76d0>,
 <TabularCPD representing P(trestbps:58 | age:50) at 0x7fb0c70a7610>,
 <TabularCPD representing P(sex:2) at 0x7fb0c70a7640>,
 <TabularCPD representing P(fbs:2 | sex:2) at 0x7fb0c70a75b0>,
 <TabularCPD representing P(chol:208 | fbs:2, trestbps:58) at 0x7fb0c6cd53c0>,
 <TabularCPD representing P(num:5 | chol:208, cp:4, restecg:3) at 0x7fb0c6cd5060>,
 <TabularCPD representing P(restecg:3) at 0x7fb0c6cd4ee0>,
 <TabularCPD representing P(thalch:115) at 0x7fb0c6cd6440>,
 <TabularCPD representing P(exang:2 | thalch:115) at 0x7fb0c6cd5930>,
 <TabularCPD representing P(cp:4 | exang:2) at 0x7fb0c6cd7e50>]

In [None]:
for cpd in model.get_cpds():
    print("CPD of {variable}:".format(variable=cpd.variable))
    print(cpd)

CPD of age:
+---------+------------+
| age(28) | 0.00134409 |
+---------+------------+
| age(29) | 0.00268817 |
+---------+------------+
| age(30) | 0.00134409 |
+---------+------------+
| age(31) | 0.00268817 |
+---------+------------+
| age(32) | 0.00537634 |
+---------+------------+
| age(33) | 0.00268817 |
+---------+------------+
| age(34) | 0.00806452 |
+---------+------------+
| age(35) | 0.0120968  |
+---------+------------+
| age(36) | 0.00672043 |
+---------+------------+
| age(37) | 0.0147849  |
+---------+------------+
| age(38) | 0.0134409  |
+---------+------------+
| age(39) | 0.0174731  |
+---------+------------+
| age(40) | 0.0134409  |
+---------+------------+
| age(41) | 0.0295699  |
+---------+------------+
| age(42) | 0.0215054  |
+---------+------------+
| age(43) | 0.0295699  |
+---------+------------+
| age(44) | 0.0255376  |
+---------+------------+
| age(45) | 0.0188172  |
+---------+------------+
| age(46) | 0.0295699  |
+---------+------------+
| age(47) | 0

In [None]:
HeartDisease_infer = VariableElimination(model)
HeartDisease_infer

<pgmpy.inference.ExactInference.VariableElimination at 0x7fb0c6cd4a90>

In [None]:
a = HeartDisease_infer.query(variables=['num'])
print(a)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2875 |
+--------+------------+
| num(1) |     0.2618 |
+--------+------------+
| num(2) |     0.1639 |
+--------+------------+
| num(3) |     0.1722 |
+--------+------------+
| num(4) |     0.1146 |
+--------+------------+


In [None]:
b = HeartDisease_infer.query(variables=['num'], evidence={'age': 41})
print(b)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2946 |
+--------+------------+
| num(1) |     0.2569 |
+--------+------------+
| num(2) |     0.1604 |
+--------+------------+
| num(3) |     0.1729 |
+--------+------------+
| num(4) |     0.1153 |
+--------+------------+


In [None]:
c = HeartDisease_infer.query(variables=['num'], evidence={'age': 41, 'trestbps': 145})
print(c)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2160 |
+--------+------------+
| num(1) |     0.3048 |
+--------+------------+
| num(2) |     0.1578 |
+--------+------------+
| num(3) |     0.1734 |
+--------+------------+
| num(4) |     0.1480 |
+--------+------------+


In [None]:
d = HeartDisease_infer.query(variables=['num'], evidence={'age': 41, 'sex': 'Male'})
print(d)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2938 |
+--------+------------+
| num(1) |     0.2574 |
+--------+------------+
| num(2) |     0.1606 |
+--------+------------+
| num(3) |     0.1729 |
+--------+------------+
| num(4) |     0.1153 |
+--------+------------+


In [None]:
e = HeartDisease_infer.query(variables=['num'], evidence={'age': 41, 'sex': 'Male', 'fbs': 1, 'restecg': 'normal', 'exang': 0})
print(e)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2947 |
+--------+------------+
| num(1) |     0.2700 |
+--------+------------+
| num(2) |     0.1578 |
+--------+------------+
| num(3) |     0.1797 |
+--------+------------+
| num(4) |     0.0977 |
+--------+------------+


In [None]:
f = HeartDisease_infer.query(variables=['num'], evidence={'age': 41, 'sex': 'Male', 'fbs': 1, 'restecg': 'normal', 'exang': 0, 'cp': 'typical angina', 'thalch': 60.0})
print(f)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2425 |
+--------+------------+
| num(1) |     0.1918 |
+--------+------------+
| num(2) |     0.1885 |
+--------+------------+
| num(3) |     0.1885 |
+--------+------------+
| num(4) |     0.1885 |
+--------+------------+


In [None]:
g = HeartDisease_infer.query(variables=['num'], evidence={'age': 28, 'sex': 'Male', 'fbs': 1, 'restecg': 'normal', 'exang': 0, 'cp': 'typical angina', 'chol': 85.0, 'trestbps': 92.0, 'thalch': 60.0})
print(g)

+--------+------------+
| num    |   phi(num) |
| num(0) |     0.2000 |
+--------+------------+
| num(1) |     0.2000 |
+--------+------------+
| num(2) |     0.2000 |
+--------+------------+
| num(3) |     0.2000 |
+--------+------------+
| num(4) |     0.2000 |
+--------+------------+


In [None]:
import pickle
with open('heart_bayesian.pkl','wb') as file:
  pickle.dump(model, file)