<a href="https://colab.research.google.com/github/Siddharth-bariya/first_repo/blob/main/bayessiannetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator,BayesianEstimator, HillClimbSearch, BicScore
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer

In [None]:
!pip install pandas numpy pgmpy

In [3]:
df = pd.read_csv("heart.csv")
df = df.rename(columns={'target': 'heart_disease'})


In [None]:
num_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
discretizer = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='quantile')
df[num_features] = discretizer.fit_transform(df[num_features])

In [15]:
hc = HillClimbSearch(df)
best_model = hc.estimate(scoring_method=BicScore(df))

  0%|          | 0/1000000 [00:00<?, ?it/s]

In [5]:
model = BayesianNetwork([
    ('age', 'heart_disease'),
    ('sex', 'heart_disease'),
    ('chol', 'heart_disease'),
    ('trestbps', 'heart_disease'),
    ('thalach', 'heart_disease'),
    ('oldpeak', 'heart_disease'),
    ('exang', 'heart_disease'),
    ('cp', 'heart_disease')
])

In [6]:
model.fit(df, estimator=MaximumLikelihoodEstimator)


In [7]:
inference = VariableElimination(model)


In [8]:
query_result = inference.query(variables=['heart_disease'], evidence={'chol': 3, 'trestbps': 3})
print(query_result)

+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |               0.5000 |
+------------------+----------------------+
| heart_disease(1) |               0.5000 |
+------------------+----------------------+


In [17]:
model2=BayesianNetwork(best_model.edges())

In [18]:
model2.fit(df, estimator=BayesianEstimator, prior_type="BDeu")


In [19]:
inference = VariableElimination(model2)


In [22]:
chol_states = model2.get_cpds('chol').state_names['chol']
trestbps_states = model2.get_cpds('trestbps').state_names['trestbps']


evidence = {
    'chol': chol_states[3] if 3 < len(chol_states) else chol_states[-1],
    'trestbps': trestbps_states[3] if 3 < len(trestbps_states) else trestbps_states[-1]
}

query_result2 = inference.query(variables=['heart_disease'], evidence=evidence)
print(query_result2)

+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |               0.5017 |
+------------------+----------------------+
| heart_disease(1) |               0.4983 |
+------------------+----------------------+


In [25]:
from pgmpy.metrics import log_likelihood_score

log_likelihood = log_likelihood_score(model2, df)
print(f"Log-Likelihood Score: {log_likelihood}")

Log-Likelihood Score: -9668.136439158636
