In [None]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Read Cleveland Heart Disease data
heartDisease = pd.read_csv("heart.csv")
heartDisease.replace("?", np.nan, inplace=True)  # Replace '?' with NaN

# Display the first few rows of the dataset
print("Few examples from the dataset:")
print(heartDisease.head())

# Define Bayesian Network structure
Model = BayesianModel(
    [
        ("age", "trestbps"),
        ("age", "fbs"),
        ("sex", "trestbps"),
        ("exang", "trestbps"),
        ("trestbps", "heartdisease"),
        ("fbs", "heartdisease"),
        ("heartdisease", "restecg"),
        ("heartdisease", "thalach"),
        ("heartdisease", "chol"),
    ]
)

# Learning CPDs using Maximum Likelihood Estimators
print("\nLearning CPD using Maximum Likelihood Estimators")
Model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Inference with Bayesian Network
HeartDisease_infer = VariableElimination(Model)

In [27]:
print("Unique ages:", heartDisease["age"].unique())
print("Unique chol values:", heartDisease["chol"].unique())

Unique ages: [63 67 37 41 56 62 57 53 44 52 48 54 49 64 58 60 50 66 43 40 69 59 42 55
 61 65 71 51 46 45 39 68 47 34 35 29 70 77 38 74 76]
Unique chol values: [233 286 229 250 204 236 268 354 254 203 192 294 256 263 199 168 239 275
 266 211 283 284 224 206 219 340 226 247 167 230 335 234 177 276 353 243
 225 302 212 330 175 417 197 198 290 253 172 273 213 305 216 304 188 282
 185 232 326 231 269 267 248 360 258 308 245 270 208 264 321 274 325 235
 257 164 141 252 255 201 222 260 182 303 265 309 307 249 186 341 183 407
 217 288 220 209 227 261 174 281 221 205 240 289 318 298 564 246 322 299
 300 293 277 214 207 223 160 394 184 315 409 244 195 196 126 313 259 200
 262 215 228 193 271 210 327 149 295 306 178 237 218 242 319 166 180 311
 278 342 169 187 157 176 241 131]


In [29]:
# Computing the Probability of HeartDisease given Age=30
print("\n1. Probability of HeartDisease given Age=30")
q1 = HeartDisease_infer.query(variables=["heartdisease"], evidence={"age": 35})
print(q1)

# Computing the Probability of HeartDisease given Cholesterol=100
print("\n2. Probability of HeartDisease given Cholesterol=100")
q2 = HeartDisease_infer.query(variables=["heartdisease"], evidence={"chol": 205})
print(q2)




1. Probability of HeartDisease given Age=30
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.7109 |
+-----------------+---------------------+
| heartdisease(1) |              0.0945 |
+-----------------+---------------------+
| heartdisease(2) |              0.0233 |
+-----------------+---------------------+
| heartdisease(3) |              0.1395 |
+-----------------+---------------------+
| heartdisease(4) |              0.0319 |
+-----------------+---------------------+

2. Probability of HeartDisease given Cholesterol=100
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.4926 |
+-----------------+---------------------+
| heartdisease(1) |              0.0000 |
+-----------------+---------------------+
| heartdisease(2) |              0.0000 |
+-----------------+---------------------+
| heartdisease(3) |              0.5074 |
+-----------------+