<center>

# Program No. 7
</center>

### <strong> Objective: </strong>Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set.


In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [25]:
# Load dataset (use your local file path or upload via Colab or Jupyter)
df = pd.read_csv("heart_disease.csv")  # Replace with your file path

# Show first few rows
df.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [26]:
# Convert target to binary: 0 (No Heart Disease), 1 (Heart Disease)
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)

# Discretize continuous variables for Bayesian Network (simplified binning)
df['age'] = pd.cut(df['age'], bins=[29, 40, 55, 77],
                   labels=['young', 'middle', 'old'])
df['chol'] = pd.cut(df['chol'], bins=[100, 200, 300, 600],
                    labels=['low', 'medium', 'high'])
df['trestbps'] = pd.cut(df['trestbps'], bins=[80, 120, 140, 200],
                        labels=['normal', 'borderline', 'high'])

# Select relevant features
data = df[['age', 'chol', 'trestbps', 'cp', 'sex', 'target']]

# Drop missing (if any)
data.dropna(inplace=True)

data.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)


Unnamed: 0,age,chol,trestbps,cp,sex,target
0,old,medium,high,3,1,1
1,young,medium,borderline,2,1,1
2,middle,medium,borderline,1,0,1
3,old,medium,normal,1,1,1
4,old,high,normal,0,0,1


In [27]:
# Import necessary class
from pgmpy.models import DiscreteBayesianNetwork  # Import DiscreteBayesianNetwork instead of BayesianModel

# Define the network structure manually
model = DiscreteBayesianNetwork([
    ('age', 'chol'),
    ('sex', 'chol'),
    ('chol', 'target'),
    ('trestbps', 'target'),
    ('cp', 'target')
])

In [28]:
# Fit model using Maximum Likelihood Estimation
model.fit(data, estimator=MaximumLikelihoodEstimator)

<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x7d2f0d16ce50>

In [29]:
# Inference
infer = VariableElimination(model)

# Example: Given a patient with middle age, high cholesterol, chest pain type 3, male, high BP
query_result = infer.query(
    variables=['target'],
    evidence={'age': 'middle', 'chol': 'high', 'cp': 3,
              'sex': 1, 'trestbps': 'high'}
)

print("\nDiagnosis Result (Heart Disease Probability):\n")
print(query_result)



Diagnosis Result (Heart Disease Probability):

+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.5000 |
+-----------+---------------+
| target(1) |        0.5000 |
+-----------+---------------+
