In [2]:
!pip install pgmpy==0.1.25

Collecting pgmpy==0.1.25
  Downloading pgmpy-0.1.25-py3-none-any.whl.metadata (6.4 kB)
Collecting networkx (from pgmpy==0.1.25)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting torch (from pgmpy==0.1.25)
  Downloading torch-2.9.1-cp310-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting tqdm (from pgmpy==0.1.25)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting opt-einsum (from pgmpy==0.1.25)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting filelock (from torch->pgmpy==0.1.25)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy>=1.13.3 (from torch->pgmpy==0.1.25)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting fsspec>=0.8.5 (from torch->pgmpy==0.1.25)
  Downloading fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch->pgmpy==0.1.25)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloa

In [None]:
from google.colab import files
uploaded = files.upload()
print(uploaded)

In [None]:
# -------------------------------------------------------
# Bayesian Network for Heart Disease Diagnosis
# Using Cleveland Heart Disease Dataset
# -------------------------------------------------------

import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer

# Step 1: Load Dataset
df = pd.read_csv("Cleveland_hd.csv")  # Cleveland dataset
print("Dataset loaded successfully!")
print(df.head())

# Step 2: Preprocess Data
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)

# Select attributes
data = df[['age', 'sex', 'cp', 'chol', 'trestbps', 'fbs', 'target']]

# Discretize continuous attributes
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data.loc[:, ['age', 'chol', 'trestbps']] = discretizer.fit_transform(
    data[['age', 'chol', 'trestbps']]
)

# Rename target
data.rename(columns={'target': 'heart_disease'}, inplace=True)

print("\nPreprocessed Data:")
print(data.head())

# Step 3: Define Bayesian Network Structure
model = BayesianModel([
    ('age', 'trestbps'),
    ('age', 'chol'),
    ('sex', 'heart_disease'),
    ('cp', 'heart_disease'),
    ('trestbps', 'heart_disease'),
    ('chol', 'heart_disease'),
    ('fbs', 'heart_disease')
])

# Step 4: Train Model
model.fit(data, estimator=MaximumLikelihoodEstimator)
print("\nModel learned successfully!")

# Step 5: Inference
inference = VariableElimination(model)

# Example queries
q1 = inference.query(variables=['heart_disease'], evidence={'chol': 2})
print("\nP(Heart Disease | High Cholesterol):")
print(q1)

q2 = inference.query(variables=['heart_disease'], evidence={'cp': 2, 'fbs': 1})
print("\nP(Heart Disease | Chest Pain & High FBS):")
print(q2)
