In [None]:
# Install or upgrade pgmpy
import sys
!{sys.executable} -m pip install pgmpy --upgrade

import pandas as pd
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer

# Load the dataset
df = pd.read_csv('heart.csv')

# Discretize continuous columns using KBinsDiscretizer
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
continuous_cols = ['age', 'trestbps', 'chol', 'thalach']

df[continuous_cols] = discretizer.fit_transform(df[continuous_cols]).astype(int)

# Define the Bayesian Network structure
model = DiscreteBayesianNetwork([
    ('age', 'chol'),
    ('age', 'trestbps'),
    ('sex', 'cp'),
    ('cp', 'target'),
    ('chol', 'target'),
    ('fbs', 'target'),
    ('restecg', 'target'),
    ('thalach', 'target'),
    ('exang', 'target'),
])

# Fit the model
model.fit(df, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Discretize evidence values (manually mapped to bin ranges used in training)
# For example: age 58 falls in bin 1 of [37–63] divided into 3 bins.
evidence = {
    'age': 1,         # Highest age bin
    'sex': 1,         # Male
    'cp': 2,          # Asymptomatic
    'chol': 1,        # High cholesterol
    'fbs': 1,         # Fasting blood sugar > 120
    'restecg': 1,     # Abnormal ECG
    'thalach': 1,     # Moderate heart rate
    'exang': 1        # Exercise-induced angina
}
# evidence = {
#     'age': 1,         # Mid age group
#     'sex': 1,
#     'cp': 2,
#     'chol': 1,        # Mid cholesterol
#     'fbs': 0,
#     'restecg': 1,
#     'thalach': 1,
#     'exang': 0
# }

# Query
q = infer.query(variables=['target'], evidence=evidence)

# Output
print("Prediction for Heart Disease (target):")
print(q)

