In [1]:
# 6
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?',np.nan)

print('Few examples from the dataset are given below')
print(heartDisease.head())
print(heartDisease.columns)


"""
"age" influences "trestbps" (resting blood pressure) and "fbs" (fasting blood sugar).
"sex" influences "trestbps".
"exang" (exercise induced angina) influences "trestbps".
"trestbps" influences "target" (presence or absence of heart disease).
"fbs" influences "target".
"target" influences "restecg" (resting electrocardiographic recording result), "thalach" (maximum heart rate achieved), and "chol" (serum cholesterol).
"""

model = BayesianNetwork([
 ('age', 'trestbps'),
 ('age', 'fbs'),
 ('sex', 'trestbps'),
 ('exang', 'trestbps'),
 ('trestbps', 'target'),
 ('fbs', 'target'),
 ('target', 'restecg'),
 ('target', 'thalach'),
 ('target', 'chol')
])

print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)

print('\nAvailable states for age:')
age_states = heartDisease['age'].unique()
print(age_states)

print('\n 1. Probability of HeartDisease given Age=37')
q=HeartDisease_infer.query(variables=['target'],evidence={'age':37})
print("Age: ",q)

print('\nAvailable states for chol:')
chol_states = heartDisease['chol'].unique()
print(chol_states)

print('\n 2. Probability of HeartDisease given cholesterol=250')
q=HeartDisease_infer.query(variables=['target'],evidence={'chol':250})
print("Chol: ",q)

Few examples from the dataset are given below
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope   
0   63    1   3       145   233    1        0      150      0      2.3      0  \
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

Learning CPD using Maximum likelihood estimators

 Inferencing with Bayesian Network:

Available states for age:
[63 37 41 56 57 44 52 54 48 49 64 58 50 66 43 69 59 42 61 40 7