# Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using the standard Heart Disease Data Set (You can use Java/Python ML library classes/API.

In [None]:
# A Bayesian Network (also called a Bayes Network or Probabilistic Graphical Model) is a graphical model
# that represents the probabilistic relationships among a set of variables.
# These relationships are encoded using directed edges in a graph,
# where each node represents a random variable and the edges represent dependencies between these variables.
# The main purpose is to model uncertain knowledge, make predictions, 
# and infer unknown values based on known information.



import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator 
from pgmpy.inference import VariableElimination
import warnings

warnings.filterwarnings("ignore")
# Step 1: Load the Heart Disease Dataset
# Assuming the Heart Disease dataset is in CSV format and available as 'heart.csv'
# You can download it from https://archive.ics.uci.edu/ml/datasets/Heart+Disease
data = pd.read_csv("heart.csv")

# Step 2: Preprocess the Data
# For simplicity, assume the columns of interest are as follows:
# - age: age of the patient
# - sex: sex (1 = male; 0 = female)
# - cp: chest pain type (1, 2, 3, 4 representing types)
# - trestbps: resting blood pressure (in mm Hg)
# - chol: serum cholesterol (in mg/dl)
# - fbs: fasting blood sugar > 120 mg/dl (1 = true; 0 = false)
# - restecg: resting electrocardiographic results
# - thalach: maximum heart rate achieved
# - exang: exercise-induced angina (1 = yes; 0 = no)
# - target: presence of heart disease (1 = disease; 0 = no disease)

# Step 3: Define the Bayesian Network structure
# Define relationships based on domain knowledge
model = BayesianNetwork([
    ('age', 'target'),  # Age may influence heart disease
    ('sex', 'target'),  # Gender may influence heart disease
    ('cp', 'target'),   # Chest pain type influences heart disease
    ('trestbps', 'target'),  # Blood pressure affects heart disease
    ('chol', 'target'),  # Cholesterol levels affect heart disease
    ('fbs', 'target'),  # Fasting blood sugar influences heart disease
    ('thalach', 'target'),  # Heart rate affects heart disease
    ('exang', 'target')  # Exercise-induced angina influences heart disease
])

# Step 4: Train the Model
# Using Maximum Likelihood Estimation (MLE) to learn the parameters
model.fit(data, estimator=MaximumLikelihoodEstimator)

# Step 5: Make Predictions using the Model
# Use Variable Elimination for inference
infer = VariableElimination(model)

# Example Case: Predict the probability of heart disease given certain symptoms
query = infer.query(
    variables=['target'],  # We want to predict the likelihood of heart disease
    evidence={
        'age': 63,
        'sex': 1,
        'cp': 3,
        'trestbps': 145,
        'chol': 233,
        'fbs': 1,
        'thalach': 150,
        'exang': 0
    }
)

# Display the result
print(query)


# Problem:
# We have the following scenario:

# We want to predict whether it will rain tomorrow (Rain), based on whether the sky is cloudy (Cloudy) and what the weather forecast says (Forecast).
# The relationship between these variables is modeled in a Bayesian network.
# Variables:
# Cloudy (C): Whether the sky is cloudy or not (True/False).
# Forecast (F): Whether the forecast predicts rain (True/False).
# Rain (R): Whether it rains tomorrow (True/False).
# Given Conditional Probabilities:
# P(Rain | Cloudy, Forecast): The probability that it rains tomorrow, given the sky is cloudy and the forecast predicts rain.

# P(Rain = True | Cloudy = True, Forecast = True) = 0.9 (90% chance it rains if the sky is cloudy and the forecast predicts rain)
# P(Rain = True | Cloudy = False, Forecast = True) = 0.2 (20% chance it rains if the sky is not cloudy but the forecast predicts rain)
# P(Rain = True | Cloudy = True, Forecast = False) = 0.3 (30% chance it rains if the sky is cloudy and the forecast doesn't predict rain)
# P(Rain = True | Cloudy = False, Forecast = False) = 0.1 (10% chance it rains if the sky is not cloudy and the forecast doesn't predict rain)
# P(Cloudy): The probability that the sky is cloudy.

# P(Cloudy = True) = 0.6 (60% chance the sky is cloudy)
# P(Cloudy = False) = 0.4 (40% chance the sky is not cloudy)
# P(Forecast): The probability that the forecast predicts rain.

# P(Forecast = True) = 0.7 (70% chance the forecast predicts rain)
# P(Forecast = False) = 0.3 (30% chance the forecast doesn't predict rain)
# Question:
# What is the probability that it will rain tomorrow, given that the sky is cloudy and the forecast predicts rain?

# Step 1: Identify the Relevant Probabilities
# We are asked to calculate P(Rain = True | Cloudy = True, Forecast = True). From the given data, we know:

# P(Rain = True | Cloudy = True, Forecast = True) = 0.9
# Step 2: Calculate Using the Bayesian Network
# In this case, since we already have the conditional probability from the network, we don’t need to do any further calculations. We simply use:




# P(Rain=True∣Cloudy=True,Forecast=True)=0.9
  


# Step 3: Interpretation
# Given that:

# The sky is cloudy,
# The forecast predicts rain,
# The probability that it will rain tomorrow is 90%.

# Final Answer:
# The probability that it will rain tomorrow, given that the sky is cloudy and the forecast predicts rain, is 0.9 or 90%.


MemoryError: Unable to allocate 6.63 GiB for an array with shape (889231616,) and data type int64