In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [34]:
from pgmpy.models import DiscreteBayesianNetwork  # Updated for new pgmpy
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [35]:
# Step 1: Load the dataset
heartDisease = pd.read_csv('D:\Programming\Sem 4 Lab\Stat_Lab\heart.csv')

In [36]:
# Step 2: Preview the dataset
print('Few examples from the dataset are given below:')
print(heartDisease.head())

Few examples from the dataset are given below:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [37]:
# Step 3: Rename target column if needed
if 'target' in heartDisease.columns:
    heartDisease.rename(columns={'target': 'heartdisease'}, inplace=True)

In [38]:
# Step 4: Clean data – handle missing values
heartDisease = heartDisease.replace('?', np.nan)
heartDisease = heartDisease.dropna()
heartDisease = heartDisease.apply(pd.to_numeric, errors='ignore')

  heartDisease = heartDisease.apply(pd.to_numeric, errors='ignore')


In [39]:
# Step 5: Discretize continuous variables
heartDisease['age'] = pd.cut(heartDisease['age'],
                             bins=[0, 30, 40, 50, 60, 70, 100],
                             labels=['0-30', '30-40', '40-50', '50-60', '60-70', '70-100'])

heartDisease['chol'] = pd.cut(heartDisease['chol'],
                              bins=[0, 200, 240, 300, 400, 600],
                              labels=['0-200', '200-240', '240-300', '300-400', '400+'])

In [40]:
# Step 6: Define the Bayesian Network structure
model = DiscreteBayesianNetwork([
    ('age', 'trestbps'), ('age', 'fbs'),
    ('sex', 'trestbps'), ('exang', 'trestbps'),
    ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
    ('heartdisease', 'restecg'), ('heartdisease', 'thalach'),
    ('heartdisease', 'chol')
])

In [41]:
# Step 7: Learn CPDs using MLE
print('\nLearning CPDs using Maximum Likelihood Estimators')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'O', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'O', 'fbs': 'N', 'restecg': 'N', 'thalach': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'heartdisease': 'N'}



Learning CPDs using Maximum Likelihood Estimators


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x1ec97a47a50>

In [42]:
# Step 7: Learn CPDs using MLE
print('\nLearning CPDs using Maximum Likelihood Estimators')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'O', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'O', 'fbs': 'N', 'restecg': 'N', 'thalach': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'heartdisease': 'N'}



Learning CPDs using Maximum Likelihood Estimators


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x1ec97a47a50>

In [44]:
# Step 10: Inference with the Bayesian Network
print('\nInferencing with Bayesian Network:')
heartDisease_infer = VariableElimination(model)

# Query 1: Probability of HeartDisease given Age = 30-40
print('\n1. Probability of HeartDisease given Age = 30-40')
q1 = heartDisease_infer.query(variables=['heartdisease'], evidence={'age': '30-40'})
print(q1)

# Query 2: Probability of HeartDisease given Cholesterol = 200-240
print('\n2. Probability of HeartDisease given Cholesterol = 200-240')
q2 = heartDisease_infer.query(variables=['heartdisease'], evidence={'chol': '200-240'})
print(q2)

# Query 3: Combined evidence
print('\n3. Probability of HeartDisease given Age = 30-40 and Cholesterol = 200-240')
q3 = heartDisease_infer.query(variables=['heartdisease'], evidence={'age': '30-40', 'chol': '200-240'})
print(q3)


Inferencing with Bayesian Network:

1. Probability of HeartDisease given Age = 30-40
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.3375 |
+-----------------+---------------------+
| heartdisease(1) |              0.6625 |
+-----------------+---------------------+

2. Probability of HeartDisease given Cholesterol = 200-240
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.3661 |
+-----------------+---------------------+
| heartdisease(1) |              0.6339 |
+-----------------+---------------------+

3. Probability of HeartDisease given Age = 30-40 and Cholesterol = 200-240
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.2686 |
+-----------------+---------------------+
| heartdisease(1) |              0.7314 |
+-----------------+---------------------+
