In [1]:
pip install --upgrade pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.24-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pgmpy
Successfully installed pgmpy-0.1.24


In [2]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [3]:
from google.colab import files
files.upload()

Saving heart.csv to heart.csv


{'heart.csv': b'Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease\r\n40,M,ATA,140,289,0,Normal,172,N,0,Up,0\r\n49,F,NAP,160,180,0,Normal,156,N,1,Flat,1\r\n37,M,ATA,130,283,0,ST,98,N,0,Up,0\r\n48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1\r\n54,M,NAP,150,195,0,Normal,122,N,0,Up,0\r\n39,M,NAP,120,339,0,Normal,170,N,0,Up,0\r\n45,F,ATA,130,237,0,Normal,170,N,0,Up,0\r\n54,M,ATA,110,208,0,Normal,142,N,0,Up,0\r\n37,M,ASY,140,207,0,Normal,130,Y,1.5,Flat,1\r\n37,F,ASY,130,173,0,ST,184,N,0,Up,0\r\n37,M,ASY,130,315,0,Normal,158,N,0,Up,0\r\n40,M,NAP,130,281,0,Normal,167,N,0,Up,0\r\n38,F,ATA,120,275,0,Normal,129,N,0,Up,0\r\n41,M,ASY,112,250,0,Normal,142,N,0,Up,0\r\n54,F,ATA,140,309,0,ST,140,N,0,Up,0\r\n39,M,ATA,120,200,0,Normal,160,Y,1,Flat,0\r\n41,M,ASY,120,336,0,Normal,118,Y,3,Flat,1\r\n55,M,TA,140,295,0,Normal,136,N,0,Flat,1\r\n48,M,ASY,160,355,0,Normal,99,Y,2,Flat,1\r\n'}

In [4]:
data = pd.read_csv('heart.csv')

In [5]:
# Define the Bayesian network structure
model = BayesianNetwork([('Age', 'HeartDisease'),
                         ('Sex', 'HeartDisease'),
                         ('ChestPainType', 'HeartDisease'),
                         ('RestingBP', 'HeartDisease'),
                         ('Cholesterol', 'HeartDisease'),
                         ('FastingBS', 'HeartDisease'),
                         ('RestingECG', 'HeartDisease'),
                         ('MaxHR', 'HeartDisease'),
                         ('ExerciseAngina', 'HeartDisease'),
                         ('Oldpeak', 'HeartDisease'),
                         ('ST_Slope', 'HeartDisease')])

In [6]:
# Estimate CPDs from data
model.fit(data, estimator=MaximumLikelihoodEstimator)


In [7]:
# Create an inference object
inference = VariableElimination(model)

In [8]:
# Provide evidence for diagnosis
evidence = {
    'Age': 40,
    'Sex': 'M',
    'ChestPainType': 'ATA',
    'RestingBP': 140,
    'Cholesterol': 289,
    'FastingBS': 0,
    'RestingECG': 'Normal',
    'MaxHR': 172,
    'ExerciseAngina': 'N',
    'Oldpeak': 0,
    'ST_Slope': 'Up'
}

In [9]:
# Query the model for the probability of Heart Disease
query_result = inference.query(variables=['HeartDisease'], evidence=evidence)
print(query_result)


+-----------------+---------------------+
| HeartDisease    |   phi(HeartDisease) |
| HeartDisease(0) |              1.0000 |
+-----------------+---------------------+
| HeartDisease(1) |              0.0000 |
+-----------------+---------------------+


In [11]:
# Diagnose the patient based on the probability
if query_result.values[1] > query_result.values[0]:
    print("The patient is likely to have Heart Disease.")
else:
    print("The patient is likely not to have Heart Disease.")

The patient is likely not to have Heart Disease.
