In [1]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator, ParameterEstimator
from pgmpy.inference import VariableElimination
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import Image

In [2]:

'''
Attribute Information:
   -- Only 14 used
      -- 1. #3  (age), age in years
      -- 2. #4  (sex), sex (1 = male; 0 = female)
      -- 3. #9  (cp), chest pain type
        -- Value 1: typical angina
        -- Value 2: atypical angina
        -- Value 3: non-anginal pain
        -- Value 4: asymptomatic
      -- 4. #10 (trestbps), resting blood pressure (in mm Hg on admission to the hospital)
      -- 5. #12 (chol), serum cholestoral in mg/dl
      -- 6. #16 (fbs), (fasting blood sugar > 120 mg/dl)  (1 = true; 0 = false)
      -- 7. #19 (restecg), restecg: resting electrocardiographic results
        -- Value 0: normal
        -- Value 1: having ST-T wave abnormality (T wave inversions and/or ST 
                    elevation or depression of > 0.05 mV)
        -- Value 2: showing probable or definite left ventricular hypertrophy
                    by Estes' criteria
      -- 8. #32 (thalach), maximum heart rate achieved
      -- 9. #38 (exang), exercise induced angina (1 = yes; 0 = no)
      -- 10. #40 (oldpeak), = ST depression induced by exercise relative to rest
      -- 11. #41 (slope), the slope of the peak exercise ST segment
        -- Value 1: upsloping
        -- Value 2: flat
        -- Value 3: downsloping   
      -- 12. #44 (ca), number of major vessels (0-3) colored by flourosopy
      -- 13. #51 (thal), 3 = normal; 6 = fixed defect; 7 = reversable defect 
      -- 14. #58 (num), (the predicted attribute) num: diagnosis of heart disease (angiographic disease status)
        -- Value 0: < 50% diameter narrowing
        -- Value 1: > 50% diameter narrowing
        (in any major vessel: attributes 59 through 68 are vessels)



10. Class Distribution:
        Database:      0   1   2   3   4 Total
          Cleveland: 164  55  36  35  13   303
          Hungarian: 188  37  26  28  15   294
        Switzerland:   8  48  32  30   5   123
      Long Beach VA:  51  56  41  42  10   200


TODO: * Check if there is a -9 in the dataset because it is stated that this would be the marker for a missing value.
      * Check for occurences of ?
'''



column_names = [
    'age',
    'sex',
    'cp',
    'trestbps',
    'chol',
    'fbs',
    'restecg',
    'thalach',
    'exang',
    'oldpeak',
    'slope',
    'ca',
    'thal', 
    'num'
]

df = pd.read_csv("data/processed.cleveland.data", header = None, names = column_names)

# Removing non-numeric values
for column in column_names:
    # pandas.to_numeric return type depends on input. Series if Series, otherwise ndarray
    # If ‘coerce’, then invalid parsing will be set as NaN
    df = df[pd.to_numeric(df[column], errors='coerce').notnull()]

df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
5,56.0,1.0,2.0,120.0,236.0,0.0,0.0,178.0,0.0,0.8,1.0,0.0,3.0,0
6,62.0,0.0,4.0,140.0,268.0,0.0,2.0,160.0,0.0,3.6,3.0,2.0,3.0,3
7,57.0,0.0,4.0,120.0,354.0,0.0,0.0,163.0,1.0,0.6,1.0,0.0,3.0,0
8,63.0,1.0,4.0,130.0,254.0,0.0,2.0,147.0,0.0,1.4,2.0,1.0,7.0,2
9,53.0,1.0,4.0,140.0,203.0,1.0,2.0,155.0,1.0,3.1,3.0,0.0,7.0,1


In [3]:
for i in range(0,len(df)):
    if not i==87 and not i==166 and not i==192 and not i==266 and not i==287:
        if df.loc[i, 'num'] >1:
            df.loc[i, 'num']=1
df.loc[299, 'num']=1
df.loc[300, 'num']=1

df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
5,56.0,1.0,2.0,120.0,236.0,0.0,0.0,178.0,0.0,0.8,1.0,0.0,3.0,0
6,62.0,0.0,4.0,140.0,268.0,0.0,2.0,160.0,0.0,3.6,3.0,2.0,3.0,1
7,57.0,0.0,4.0,120.0,354.0,0.0,0.0,163.0,1.0,0.6,1.0,0.0,3.0,0
8,63.0,1.0,4.0,130.0,254.0,0.0,2.0,147.0,0.0,1.4,2.0,1.0,7.0,1
9,53.0,1.0,4.0,140.0,203.0,1.0,2.0,155.0,1.0,3.1,3.0,0.0,7.0,1


# Conversion of categorical into bins

In [4]:
#age
out, bins =pd.qcut(df.iloc[:, 0], 4, labels=["Age: (29,48]", "Age: (48,56]","Age:(56,61]", "Age: (61,77]"], retbins= True)
df.iloc[:, 0] = out
#trestbps
out, bins = pd.qcut(df.iloc[:, 3], 4, labels=["trestbps: (94,120]", "trestbps: (120,130]","trestbps:(130,140]", "trestbsp: (140,200]"], retbins= True)
df.iloc[:, 3] = out
#chol
out, bins = pd.qcut(df.iloc[:, 4], 4, labels=["chol: (126,211]", "chol: (211,243]","chol: (243,276]", "chol: (276,564]"], retbins= True)
df.iloc[:, 4] = out
#thalach
out, bins = pd.qcut(df.iloc[:, 7], 4, labels=["thalach: (71,133]", "thalach: (133,153]","thalach: (153,166]", "thalach: (166,202]"], retbins= True)
df.iloc[:, 7] = out
#oldpeak
out, bins = pd.qcut(df.iloc[:, 9], 3, labels=["oldpeak: (0,0.1]", "oldpeak: (0.1,1.4]","oldpeak: (1.4,6.2]"], retbins= True)
df.iloc[:, 9] = out

In [5]:
#conversion check
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,"Age: (61,77]",1.0,1.0,"trestbsp: (140,200]","chol: (211,243]",1.0,2.0,"thalach: (133,153]",0.0,"oldpeak: (1.4,6.2]",3.0,0.0,6.0,0
1,"Age: (61,77]",1.0,4.0,"trestbsp: (140,200]","chol: (276,564]",0.0,2.0,"thalach: (71,133]",1.0,"oldpeak: (1.4,6.2]",2.0,3.0,3.0,1
2,"Age: (61,77]",1.0,4.0,"trestbps: (94,120]","chol: (211,243]",0.0,2.0,"thalach: (71,133]",1.0,"oldpeak: (1.4,6.2]",2.0,2.0,7.0,1
3,"Age: (29,48]",1.0,3.0,"trestbps: (120,130]","chol: (243,276]",0.0,0.0,"thalach: (166,202]",0.0,"oldpeak: (1.4,6.2]",3.0,0.0,3.0,0
4,"Age: (29,48]",0.0,2.0,"trestbps: (120,130]","chol: (126,211]",0.0,2.0,"thalach: (166,202]",0.0,"oldpeak: (0.1,1.4]",1.0,0.0,3.0,0
5,"Age: (48,56]",1.0,2.0,"trestbps: (94,120]","chol: (211,243]",0.0,0.0,"thalach: (166,202]",0.0,"oldpeak: (0.1,1.4]",1.0,0.0,3.0,0
6,"Age: (61,77]",0.0,4.0,"trestbps:(130,140]","chol: (243,276]",0.0,2.0,"thalach: (153,166]",0.0,"oldpeak: (1.4,6.2]",3.0,2.0,3.0,1
7,"Age:(56,61]",0.0,4.0,"trestbps: (94,120]","chol: (276,564]",0.0,0.0,"thalach: (153,166]",1.0,"oldpeak: (0.1,1.4]",1.0,0.0,3.0,0
8,"Age: (61,77]",1.0,4.0,"trestbps: (120,130]","chol: (243,276]",0.0,2.0,"thalach: (133,153]",0.0,"oldpeak: (0.1,1.4]",2.0,1.0,7.0,1
9,"Age: (48,56]",1.0,4.0,"trestbps:(130,140]","chol: (126,211]",1.0,2.0,"thalach: (153,166]",1.0,"oldpeak: (1.4,6.2]",3.0,0.0,7.0,1


# Bayesian network construction

In [120]:
#model = BayesianModel([('age', 'trestbps'),('age', 'chol'), ('age', 'fbs'), ('age','restecg'), ('sex', 'trestbps'), ('sex', 'chol'), ('sex','fbs'), ('sex', 'restecg'), ('cp', 'trestbps'), ('cp', 'chol'), ('cp', 'fbs'), ('cp', 'restecg'), ('trestbps', 'thalach'), ('trestbps','exang'), ('trestbps', 'oldpeak'), ('trestbps', 'slope'), ('trestbps', 'ca'), ('trestbps', 'thal'), ('chol', 'thalach'), ('chol', 'exang'), ('chol', 'oldpeak'), ('chol', 'slope'), ('chol', 'ca'), ('chol', 'thal'), ('fbs', 'thalach'), ('fbs', 'exang'), ('fbs', 'oldpeak'), ('fbs', 'slope'), ('fbs', 'ca'), ('fbs', 'thal'), ('restecg', 'thalach'), ('restecg', 'exang'), ('restecg', 'oldpeak'), ('restecg', 'slope'), ('restecg', 'ca'), ('restecg', 'thal'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'), ('slope', 'num'), ('ca', 'num'), ('thal', 'num')])
model = BayesianModel([('age', 'cp'),('age', 'trestbps'),('age', 'chol'), ('age', 'fbs'), ('age','restecg'),('sex', 'cp') ,('sex', 'trestbps'), ('sex', 'chol'), ('sex','fbs'), ('sex', 'restecg'), ('cp', 'thalach'), ('cp', 'exang'), ('cp', 'oldpeak'), ('cp', 'slope'), ('cp', 'ca'),('cp', 'thal'),('trestbps', 'thalach'), ('trestbps','exang'), ('trestbps', 'oldpeak'), ('trestbps', 'slope'), ('trestbps', 'ca'), ('trestbps', 'thal'), ('chol', 'thalach'), ('chol', 'exang'), ('chol', 'oldpeak'), ('chol', 'slope'), ('chol', 'ca'), ('chol', 'thal'), ('fbs', 'thalach'), ('fbs', 'exang'), ('fbs', 'oldpeak'), ('fbs', 'slope'), ('fbs', 'ca'), ('fbs', 'thal'), ('restecg', 'thalach'), ('restecg', 'exang'), ('restecg', 'oldpeak'), ('restecg', 'slope'), ('restecg', 'ca'), ('restecg', 'thal'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'), ('slope', 'num'), ('ca', 'num'), ('thal', 'num')])
model1 = BayesianModel([('age', 'cp'),('age', 'trestbps'),('age', 'chol'), ('age', 'fbs'),('sex', 'cp') ,('sex', 'trestbps'), ('sex', 'chol'), ('sex','fbs'), ('cp', 'thalach'), ('cp', 'exang'), ('cp', 'oldpeak'), ('cp', 'slope'), ('cp', 'ca'),('cp', 'thal'),('cp', 'restecg'),('trestbps', 'restecg'),('trestbps', 'thalach'), ('trestbps','exang'), ('trestbps', 'oldpeak'), ('trestbps', 'slope'), ('trestbps', 'ca'), ('trestbps', 'thal'), ('chol', 'thalach'), ('chol', 'exang'), ('chol', 'oldpeak'), ('chol', 'slope'), ('chol', 'ca'), ('chol', 'thal'),('chol', 'restecg') ,('fbs', 'restecg'),('fbs', 'thalach'), ('fbs', 'exang'), ('fbs', 'oldpeak'), ('fbs', 'slope'), ('fbs', 'ca'), ('fbs', 'thal'),('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'), ('slope', 'num'), ('ca', 'num'), ('thal', 'num'), ('restecg', 'num')])
model2 = BayesianModel([('age', 'cp'),('age', 'trestbps'),('age', 'chol'), ('age', 'fbs'), ('age','restecg'),('age', 'thalach'), ('age', 'exang'), ('age', 'oldpeak'), ('age', 'slope'), ('age', 'ca'), ('age','thal'), ('sex', 'cp') ,('sex', 'trestbps'), ('sex', 'chol'), ('sex','fbs'), ('sex', 'restecg'), ('sex', 'thalach'), ('sex', 'exang'), ('sex','oldpeak'), ('sex', 'slope'), ('sex','ca'),('sex','thal'), ('cp', 'num'), ('trestbps', 'num'), ('chol', 'num'), ('fbs', 'num'), ('restecg', 'num'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'), ('slope', 'num'), ('ca', 'num'), ('thal', 'num')])

In [105]:
#estimation of parameters
model.fit(df, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5
for cpd in model.get_cpds():
    print(cpd)

+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| age      | age(Age: (29,48]) | age(Age: (29,48]) | age(Age: (48,56]) | age(Age: (48,56]) | age(Age: (61,77]) | age(Age: (61,77]) | age(Age:(56,61]) | age(Age:(56,61]) |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| sex      | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)         | sex(1.0)         |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| fbs(0.0) | 0.941988950276    | 0.926439232409    | 0.865482233503    | 0.789044289044    | 0.867816091954    | 0.790220820189    | 0.7684563758

+------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+------------------------------+------------------------------+----------

+---------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-------

In [122]:
#estimation of parameters
model1.fit(df, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5
for cpd in model1.get_cpds():
    print(cpd)

+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| age      | age(Age: (29,48]) | age(Age: (29,48]) | age(Age: (48,56]) | age(Age: (48,56]) | age(Age: (61,77]) | age(Age: (61,77]) | age(Age:(56,61]) | age(Age:(56,61]) |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| sex      | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)         | sex(1.0)         |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| fbs(0.0) | 0.941988950276    | 0.926439232409    | 0.865482233503    | 0.789044289044    | 0.867816091954    | 0.790220820189    | 0.7684563758

+---------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-----------------------------+-----------------------------+----------------------------+-----------------------------+-------

In [125]:
#estimation of parameters
model2.fit(df, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5
for cpd in model2.get_cpds():
    print(cpd)

+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| age      | age(Age: (29,48]) | age(Age: (29,48]) | age(Age: (48,56]) | age(Age: (48,56]) | age(Age: (61,77]) | age(Age: (61,77]) | age(Age:(56,61]) | age(Age:(56,61]) |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| sex      | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)          | sex(1.0)          | sex(0.0)         | sex(1.0)         |
+----------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+------------------+
| fbs(0.0) | 0.941988950276    | 0.926439232409    | 0.865482233503    | 0.789044289044    | 0.867816091954    | 0.790220820189    | 0.7684563758

MemoryError: 

In [108]:
#check sizes of cps for correctness
model.get_cpds()

[<TabularCPD representing P(fbs:2 | age:4, sex:2) at 0xe44c430>,
 <TabularCPD representing P(slope:3 | chol:4, cp:4, fbs:2, restecg:3, trestbps:4) at 0xe58bb70>,
 <TabularCPD representing P(trestbps:4 | age:4, sex:2) at 0xe58bc30>,
 <TabularCPD representing P(exang:2 | chol:4, cp:4, fbs:2, restecg:3, trestbps:4) at 0xe44c170>,
 <TabularCPD representing P(thalach:4 | chol:4, cp:4, fbs:2, restecg:3, trestbps:4) at 0xe44c7b0>,
 <TabularCPD representing P(age:4) at 0xe44c390>,
 <TabularCPD representing P(chol:4 | age:4, sex:2) at 0xe99b650>,
 <TabularCPD representing P(sex:2) at 0xe99b350>,
 <TabularCPD representing P(oldpeak:3 | chol:4, cp:4, fbs:2, restecg:3, trestbps:4) at 0xe99b1f0>,
 <TabularCPD representing P(num:2 | ca:4, exang:2, oldpeak:3, slope:3, thal:3, thalach:4) at 0xe58b5b0>,
 <TabularCPD representing P(restecg:3 | age:4, sex:2) at 0xe58b910>,
 <TabularCPD representing P(cp:4 | age:4, sex:2) at 0xe44c3b0>,
 <TabularCPD representing P(ca:4 | chol:4, cp:4, fbs:2, restecg:3, tr

In [109]:
model1.get_cpds()

[<TabularCPD representing P(fbs:2 | age:4, sex:2) at 0xe987bb0>,
 <TabularCPD representing P(slope:3 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe987850>,
 <TabularCPD representing P(trestbps:4 | age:4, sex:2) at 0xe987710>,
 <TabularCPD representing P(exang:2 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe987310>,
 <TabularCPD representing P(restecg:3 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe9878f0>,
 <TabularCPD representing P(age:4) at 0xe99b1b0>,
 <TabularCPD representing P(chol:4 | age:4, sex:2) at 0xe987490>,
 <TabularCPD representing P(sex:2) at 0xe987910>,
 <TabularCPD representing P(oldpeak:3 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe987390>,
 <TabularCPD representing P(num:2 | ca:4, exang:2, oldpeak:3, restecg:3, slope:3, thal:3, thalach:4) at 0xe987b10>,
 <TabularCPD representing P(thalach:4 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe987790>,
 <TabularCPD representing P(cp:4 | age:4, sex:2) at 0xe987b70>,
 <TabularCPD representing P(ca:4 | chol:4, cp:4, fbs:2, trestbps:4) at 0xe987270>,


In [37]:
#possible check for correctness cardinality of variables
model.get_cardinality('num')

2

In [43]:
model.local_independencies('num')

(num _|_ fbs, trestbps, restecg, age, cp, chol, sex | slope, oldpeak, exang, thalach, ca, thal)

In [44]:
# Getting all the local independencies in the network.
model.local_independencies(['sex', 'age', 'fbs', 'slope', 'trestbps', 'exang', 'thalach', 'chol', 'oldpeak', 'restecg', 'cp', 'ca', 'thal', 'num'])

(sex _|_ fbs, slope, trestbps, exang, restecg, age, chol, oldpeak, num, thalach, cp, ca, thal)
(age _|_ fbs, slope, trestbps, exang, restecg, ca, chol, sex, oldpeak, num, thalach, cp, thal)
(fbs _|_ slope, trestbps, exang, restecg, ca, chol, oldpeak, num, thalach, cp, thal | age, sex)
(slope _|_ exang, age, sex, oldpeak, num, thalach, ca, thal | fbs, cp, trestbps, chol, restecg)
(trestbps _|_ fbs, slope, exang, restecg, ca, chol, oldpeak, num, thalach, cp, thal | age, sex)
(exang _|_ slope, age, sex, oldpeak, num, thalach, ca, thal | fbs, cp, trestbps, chol, restecg)
(thalach _|_ slope, exang, age, sex, oldpeak, num, ca, thal | fbs, cp, trestbps, chol, restecg)
(chol _|_ fbs, slope, trestbps, exang, restecg, ca, oldpeak, num, thalach, cp, thal | age, sex)
(oldpeak _|_ slope, exang, age, sex, num, thalach, ca, thal | fbs, cp, trestbps, chol, restecg)
(restecg _|_ fbs, slope, trestbps, exang, ca, chol, oldpeak, num, thalach, cp, thal | age, sex)
(cp _|_ fbs, slope, trestbps, exang, reste

In [111]:
model.check_model()

True

In [112]:
model1.check_model()

True

In [126]:
model2.check_model()

True

# Inference

for ca the order for likelihood of heartdisease is reversed: 0 (worst) - 3(best).
for slope flat (1) is the worst. 
CP value 4 seems to be really predictive


In [None]:
predict num
inference on sex =1 and age bin 3 (combinations of male and female and ages)
inference on slope and ca. (all combinations)
inference on cp (all combinations)

In [39]:
# Basic inference on num, without observed variables. Note: takes approx 15 min for me.
infer = VariableElimination(model)
print(infer.query(['num']) ['num'])

KeyboardInterrupt: 

In [40]:
infer = VariableElimination(model)
print(infer.query(['num']) ['num'])
print(infer.query(['num'], evidence={'sex': 0}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5397 |
| num_1 |     0.4603 |
+-------+------------+
+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5702 |
| num_1 |     0.4298 |
+-------+------------+


In [123]:
infer1 = VariableElimination(model1)
print(infer1.query(['num']) ['num'])
print(infer1.query(['num'], evidence={'sex': 0}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5624 |
| num_1 |     0.4376 |
+-------+------------+
+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5839 |
| num_1 |     0.4161 |
+-------+------------+


In [127]:
infer2 = VariableElimination(model2)
print(infer2.query(['num']) ['num'])
print(infer2.query(['num'], evidence={'sex': 0}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5048 |
| num_1 |     0.4952 |
+-------+------------+
+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5090 |
| num_1 |     0.4910 |
+-------+------------+


In [128]:
print(infer2.query(['num'], evidence={'sex': 1, 'age': 1, 'cp': 3, 'chol': 3, 'trestbps': 3, 'fbs': 1, 'restecg': 2,'ca': 3, 'exang': 1,'oldpeak': 2, 'slope': 2, 'thal': 2 }) ['num'])
print(infer2.query(['num'], evidence={'sex': 1, 'age': 3, 'cp': 3, 'chol': 3, 'trestbps': 0, 'fbs': 0, 'restecg': 0,'ca': 3, 'exang': 1,'oldpeak': 2, 'slope': 1, 'thal':2 }) ['num'])
print(infer2.query(['num'], evidence={'sex': 0, 'age': 0, 'cp': 0, 'chol': 0, 'trestbps': 1, 'fbs': 1, 'restecg': 1,'ca': 3, 'exang': 1,'oldpeak': 2 }) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5000 |
| num_1 |     0.5000 |
+-------+------------+
+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5000 |
| num_1 |     0.5000 |
+-------+------------+
+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5000 |
| num_1 |     0.5000 |
+-------+------------+


In [103]:
print(infer.query(['num'], evidence={'sex': 1}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5249 |
| num_1 |     0.4751 |
+-------+------------+


In [100]:
print(infer.query(['num'], evidence={'restecg': 0}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5537 |
| num_1 |     0.4463 |
+-------+------------+


In [101]:
print(infer.query(['num'], evidence={'restecg': 1}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.4599 |
| num_1 |     0.5401 |
+-------+------------+


In [102]:
print(infer.query(['num'], evidence={'restecg': 2}) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5289 |
| num_1 |     0.4711 |
+-------+------------+


In [88]:
print(infer.query(['num'], evidence={'sex': 1, 'age': 1, 'cp': 3, 'chol': 3, 'trestbps': 3, 'fbs': 1, 'restecg': 2,'ca': 3, 'exang': 1,'oldpeak': 2, 'slope': 2, 'thal': 2 }) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.5000 |
| num_1 |     0.5000 |
+-------+------------+


In [96]:
print(infer.query(['num'], evidence={'sex': 1, 'age': 3, 'cp': 3, 'chol': 3, 'trestbps': 0, 'fbs': 0, 'restecg': 0,'ca': 3, 'exang': 1,'oldpeak': 2, 'slope': 1, 'thal':2 }) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.3754 |
| num_1 |     0.6246 |
+-------+------------+


In [59]:
print(infer.query(['num'], evidence={'sex': 0, 'age': 0, 'cp': 0, 'chol': 0, 'trestbps': 1, 'fbs': 1, 'restecg': 1,'ca': 3, 'exang': 1,'oldpeak': 2 }) ['num'])

+-------+------------+
| num   |   phi(num) |
|-------+------------|
| num_0 |     0.4585 |
| num_1 |     0.5415 |
+-------+------------+
