In [3]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Generate synthetic data
np.random.seed(0)
data = {
    'age': pd.cut(np.random.randint(29, 77, 300), bins=[29, 40, 50, 60, 77], labels=['young', 'mid-age', 'old', 'very-old']),
    'sex': np.random.choice([0, 1], 300),
    'cp': np.random.randint(0, 4, 300),
    'trestbps': np.random.randint(94, 200, 300),
    'chol': pd.cut(np.random.randint(126, 564, 300), bins=[126, 200, 300, 400, 564], labels=['low', 'medium', 'high', 'very-high']),
    'fbs': np.random.choice([0, 1], 300),
    'restecg': np.random.randint(0, 2, 300),
    'thalach': np.random.randint(71, 202, 300),
    'exang': np.random.choice([0, 1], 300),
    'oldpeak': np.random.uniform(0.0, 6.2, 300),
    'slope': np.random.randint(0, 3, 300),
    'ca': np.random.randint(0, 4, 300),
    'thal': np.random.randint(1, 4, 300),
    'heartdisease': np.random.choice([0, 1], 300)
}
heartDisease_df = pd.DataFrame(data)

# Define model and fit
model = BayesianNetwork([
    ('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('exang', 'trestbps'),
    ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'), ('heartdisease', 'restecg'),
    ('heartdisease', 'thalach'), ('heartdisease', 'chol')
])
model.fit(heartDisease_df, estimator=MaximumLikelihoodEstimator)

# Inference
infer = VariableElimination(model)
print(infer.query(variables=['heartdisease'], evidence={'age': 'young'}))
print(infer.query(variables=['heartdisease'], evidence={'chol': 'low'}))


Few examples from the dataset:
        age  sex  cp  trestbps       chol  fbs  restecg  thalach  exang  \
0  very-old    1   2       122  very-high    0        0      143      1   
1  very-old    1   2       166        low    0        0       76      1   
2       NaN    0   0       112        low    1        1      110      1   
3     young    0   3       119        low    0        0      150      0   
4     young    1   2       118  very-high    0        0      150      0   

    oldpeak  slope  ca  thal  heartdisease  
0  2.219500      0   1     1             1  
1  2.697880      1   0     1             0  
2  3.663746      2   3     3             0  
3  4.478827      0   3     1             0  
4  1.969318      1   2     3             0  

Learning CPD using Maximum Likelihood Estimators...

Inferencing with Bayesian Network:

1. Probability of HeartDisease given Age=young
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |        