In [3]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.sampling import BayesianModelSampling
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

In [4]:
df = pd.read_csv("data_asia.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,asia,tub,smoke,lung,bronc,either,xray,dysp
0,0,no,no,no,no,yes,no,no,no
1,1,no,no,no,yes,no,yes,yes,no
2,2,no,no,no,no,no,no,no,no
3,3,no,no,yes,no,yes,no,no,yes
4,4,no,no,yes,no,no,no,no,no


In [8]:
model = BayesianNetwork ([( "asia" , "tub" ),
                          ( "smoke" , "lung" ),
                          ( "smoke" , "bronc" ),
                          ( "tub" , "either" ),
                          ( "lung" , "either" ),
                          ( "either" , "xray" ),
                          ( "either" , "dysp" ),
                          ( "bronc" , "dysp" )])

In [9]:
mls = MaximumLikelihoodEstimator(model=model,data=df)

In [17]:
cpd_s = mls.estimate_cpd(node="smoke")
print(cpd_s)

+------------+---------+
| smoke(no)  | 0.50054 |
+------------+---------+
| smoke(yes) | 0.49946 |
+------------+---------+


In [18]:
df[["smoke"]].value_counts()

smoke
no       50054
yes      49946
dtype: int64

In [19]:
cpd_b = mls.estimate_cpd(node="bronc")
print(cpd_b)

+------------+---------------------+---------------------+
| smoke      | smoke(no)           | smoke(yes)          |
+------------+---------------------+---------------------+
| bronc(no)  | 0.7000039956846605  | 0.40153365634885674 |
+------------+---------------------+---------------------+
| bronc(yes) | 0.29999600431533946 | 0.5984663436511433  |
+------------+---------------------+---------------------+


In [7]:
df[["smoke","bronc"]].value_counts()

smoke  bronc
no     no       35038
yes    yes      29891
       no       20055
no     yes      15016
dtype: int64

In [11]:
model.fit(data=df,estimator=MaximumLikelihoodEstimator)
for i in model.nodes():
    print(model.get_cpds(i))

+-----------+---------+
| asia(no)  | 0.99057 |
+-----------+---------+
| asia(yes) | 0.00943 |
+-----------+---------+
+----------+----------------------+---------------------+
| asia     | asia(no)             | asia(yes)           |
+----------+----------------------+---------------------+
| tub(no)  | 0.9895918511564049   | 0.9448568398727466  |
+----------+----------------------+---------------------+
| tub(yes) | 0.010408148843595103 | 0.05514316012725345 |
+----------+----------------------+---------------------+
+------------+---------+
| smoke(no)  | 0.50054 |
+------------+---------+
| smoke(yes) | 0.49946 |
+------------+---------+
+-----------+----------------------+---------------------+
| smoke     | smoke(no)            | smoke(yes)          |
+-----------+----------------------+---------------------+
| lung(no)  | 0.9905701842010628   | 0.8993512994033557  |
+-----------+----------------------+---------------------+
| lung(yes) | 0.009429815798937148 | 0.100648700596644

In [12]:
model = BayesianNetwork ([( "asia" , "tub" ),
                          ( "smoke" , "lung" ),
                          ( "smoke" , "bronc" ),
                          ( "tub" , "either" ),
                          ( "lung" , "either" ),
                          ( "either" , "xray" ),
                          ( "either" , "dysp" ),
                          ( "bronc" , "dysp" )])

In [13]:
eby = BayesianEstimator(model,data=df)

In [20]:
cpd_by_s = eby.estimate_cpd(node="smoke",prior_type="dirichlet",pseudo_counts=[[1],[200000]])
print(cpd_by_s)

+------------+----------+
| smoke(no)  | 0.166849 |
+------------+----------+
| smoke(yes) | 0.833151 |
+------------+----------+


In [23]:
cpd_by_b = eby.estimate_cpd(node="bronc",prior_type="dirichlet",pseudo_counts=[[1,200000],[1,200000]])
print(cpd_by_b)

+------------+-------------------+---------------------+
| smoke      | smoke(no)         | smoke(yes)          |
+------------+-------------------+---------------------+
| bronc(no)  | 0.699996004474988 | 0.48906979948704954 |
+------------+-------------------+---------------------+
| bronc(yes) | 0.300003995525012 | 0.5109302005129505  |
+------------+-------------------+---------------------+
