In [1]:
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import pandas as pd

In [2]:
data = "http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data"

In [3]:
names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "heartdisease"]
heartDisease = pd.read_csv(urlopen(data), names=names)

In [4]:
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [5]:
del heartDisease["ca"]
del heartDisease["slope"]
del heartDisease["thal"]
del heartDisease["oldpeak"]

In [6]:
heartDisease = heartDisease.replace("?", np.nan)
heartDisease.dtypes

age              int64
sex              int64
cp               int64
trestbps        object
chol            object
fbs             object
restecg         object
thalach         object
exang           object
heartdisease     int64
dtype: object

In [7]:
heartDisease.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

In [8]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

In [9]:
model = BayesianModel([("age", "trestbps"),
                       ("age", "fbs"),
                       ("sex", "trestbps"),
                       ("sex", "trestbps"),
                       ("exang", "trestbps"),
                       ("trestbps", "heartdisease"),
                       ("fbs", "heartdisease"),
                       ("heartdisease", "restecg"),
                       ("heartdisease", "thalach"),
                       ("heartdisease", "chol")])

In [10]:
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

In [11]:
print(model.get_cpds("age"))

+---------+------------+
| age(28) | 0.00383142 |
+---------+------------+
| age(29) | 0.00383142 |
+---------+------------+
| age(30) | 0.00383142 |
+---------+------------+
| age(31) | 0.00766284 |
+---------+------------+
| age(32) | 0.0153257  |
+---------+------------+
| age(33) | 0.00766284 |
+---------+------------+
| age(34) | 0.0153257  |
+---------+------------+
| age(35) | 0.0191571  |
+---------+------------+
| age(36) | 0.0191571  |
+---------+------------+
| age(37) | 0.0306513  |
+---------+------------+
| age(38) | 0.0191571  |
+---------+------------+
| age(39) | 0.0344828  |
+---------+------------+
| age(40) | 0.0191571  |
+---------+------------+
| age(41) | 0.0383142  |
+---------+------------+
| age(42) | 0.0268199  |
+---------+------------+
| age(43) | 0.0421456  |
+---------+------------+
| age(44) | 0.0268199  |
+---------+------------+
| age(45) | 0.0229885  |
+---------+------------+
| age(46) | 0.045977   |
+---------+------------+
| age(47) | 0.0344828  |


In [12]:
print(model.get_cpds("chol"))

+--------------+----------------------+----------------------+
| heartdisease | heartdisease(0)      | heartdisease(1)      |
+--------------+----------------------+----------------------+
| chol(100)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(117)    | 0.0                  | 0.01020408163265306  |
+--------------+----------------------+----------------------+
| chol(129)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(132)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(147)    | 0.012269938650306749 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(156)    | 0.0                  | 0.01020408163265306  |
+--------------+----------------------+----------------------+
| chol(160)    | 0.012269938650306749 | 0.0102040816326

In [13]:
print(model.get_cpds("sex"))

+--------+----------+
| sex(0) | 0.264368 |
+--------+----------+
| sex(1) | 0.735632 |
+--------+----------+


In [14]:
model.get_independencies()

(age _|_ exang, sex)
(age _|_ sex | exang)
(age _|_ exang | sex)
(age _|_ exang, sex | fbs)
(age _|_ restecg, chol, thalach | heartdisease)
(age _|_ chol, thalach | heartdisease, restecg)
(age _|_ restecg, thalach | heartdisease, chol)
(age _|_ sex | exang, fbs)
(age _|_ restecg, chol, thalach | exang, heartdisease)
(age _|_ heartdisease, restecg, chol, thalach | trestbps, fbs)
(age _|_ restecg, chol, thalach | trestbps, heartdisease)
(age _|_ restecg, chol | heartdisease, thalach)
(age _|_ exang | sex, fbs)
(age _|_ restecg, chol, thalach | heartdisease, sex)
(age _|_ restecg, chol, thalach | heartdisease, fbs)
(age _|_ thalach | heartdisease, restecg, chol)
(age _|_ chol, thalach | exang, heartdisease, restecg)
(age _|_ heartdisease, chol, thalach | trestbps, restecg, fbs)
(age _|_ chol, thalach | heartdisease, trestbps, restecg)
(age _|_ chol | heartdisease, restecg, thalach)
(age _|_ chol, thalach | heartdisease, restecg, sex)
(age _|_ chol, thalach | heartdisease, restecg, fbs)
(a

In [15]:
from pgmpy.inference import VariableElimination

In [16]:
heartDisease_infer = VariableElimination(model)

In [17]:
q = heartDisease_infer.query(variables=["heartdisease"],
                             evidence={"age": 29})
print(q)

Finding Elimination Order: : 100%|██████████| 7/7 [00:00<00:00, 4119.56it/s]
Eliminating: thalach: 100%|██████████| 7/7 [00:00<00:00, 756.84it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.6630 |
+-----------------+---------------------+
| heartdisease(1) |              0.3370 |
+-----------------+---------------------+





In [18]:
q = heartDisease_infer.query(variables=["heartdisease"],
                             evidence={"chol": 100})
print(q)

  "Found unknown state name. Trying to switch to using all state names as state numbers"
Finding Elimination Order: : 100%|██████████| 7/7 [00:00<00:00, 3956.89it/s]
Eliminating: thalach: 100%|██████████| 7/7 [00:00<00:00, 404.16it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              1.0000 |
+-----------------+---------------------+
| heartdisease(1) |              0.0000 |
+-----------------+---------------------+



