In [1]:
!pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.20-py3-none-any.whl (1.9 MB)
     ---------------------------------------- 1.9/1.9 MB 3.7 MB/s eta 0:00:00
Collecting torch
  Downloading torch-1.13.0-cp39-cp39-win_amd64.whl (167.2 MB)
     -------------------------------------- 167.2/167.2 MB 1.3 MB/s eta 0:00:00
Collecting opt-einsum
  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
     -------------------------------------- 65.5/65.5 kB 153.5 kB/s eta 0:00:00
Installing collected packages: torch, opt-einsum, pgmpy
Successfully installed opt-einsum-3.3.0 pgmpy-0.1.20 torch-1.13.0




In [8]:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

heartDisease=pd.read_csv('Data/heart.csv')
heartDisease=heartDisease.replace('?',np.nan)

print('Sample instances from the dataset are given below\n')
print(heartDisease.head())


Sample instances from the dataset are given below

   age  gender  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0   63       1   1       145   233    1        2      150      0      2.3   
1   67       1   4       160   286    0        2      108      1      1.5   
2   67       1   4       120   229    0        2      129      1      2.6   
3   37       1   3       130   250    0        0      187      0      3.5   
4   41       0   2       130   204    0        2      172      0      1.4   

   slope ca thal  heartdisease  
0      3  0    6             0  
1      2  3    3             2  
2      2  2    7             1  
3      3  0    3             0  
4      1  0    3             0  


In [9]:
heartDisease.isnull().sum()

age             0
gender          0
cp              0
trestbps        0
chol            0
fbs             0
restecg         0
thalach         0
exang           0
oldpeak         0
slope           0
ca              4
thal            2
heartdisease    0
dtype: int64

In [12]:
print('\n Attributes and datatypes \n')
print(heartDisease.dtypes)

model=BayesianModel([('age','heartdisease'),('exang','heartdisease'),
                     ('cp','heartdisease'),('heartdisease','restecg'),
                    ('heartdisease','chol')])

print('\n Learning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')
HeartDiseasetest_infer=VariableElimination(model)

print('\n 1. Probability of HeartDisease given evidence = restecg')
q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)

print('\n 2. Probability of HeartDisease given evidence = cp')
q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)


 Attributes and datatypes 

age               int64
gender            int64
cp                int64
trestbps          int64
chol              int64
fbs               int64
restecg           int64
thalach           int64
exang             int64
oldpeak         float64
slope             int64
ca               object
thal             object
heartdisease      int64
dtype: object

 Learning CPD using Maximum likelihood estimators

 Inferencing with Bayesian Network:

 1. Probability of HeartDisease given evidence = restecg
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.1386 |
+-----------------+---------------------+
| heartdisease(1) |              0.0000 |
+-----------------+---------------------+
| heartdisease(2) |              0.2427 |
+-----------------+---------------------+
| heartdisease(3) |              0.2179 |
+-----------------+---------------------+
| heartdisease(4) |              0.4008 |
+------------

