<a href="https://colab.research.google.com/github/Kirans1ngh/Machine-Learning-practice/blob/main/Classification/Heart_Disease_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def generate_heart_data(samples=500):
    """
    Generates a synthetic heart disease dataset for educational purposes.
    Features: Age, Cholesterol, Max Heart Rate, Chest Pain (0-3).
    Target: 0 (Healthy), 1 (Disease).
    """
    np.random.seed(42)
    age = np.random.randint(30, 80, samples)
    chol = np.random.randint(150, 400, samples)
    max_hr = np.random.randint(100, 200, samples)
    chest_pain = np.random.randint(0, 4, samples)

    # Simple logic to determine disease probability
    # Higher age, higher chol, lower max_hr, higher chest_pain -> Higher risk
    risk = (age * 0.1 + chol * 0.05 - max_hr * 0.2 + chest_pain * 5)
    target = (risk > np.median(risk)).astype(int)

    df = pd.DataFrame({
        'Age': age,
        'Cholesterol': chol,
        'Max_HR': max_hr,
        'Chest_Pain': chest_pain,
        'Target': target
    })
    return df

# 1. DATA PREPARATION
print("--- Step 1: Data Preparation ---")
df = generate_heart_data()
X = df.drop('Target', axis=1)
y = df['Target']

# Split into Training and Testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalization (Crucial for Neural Networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. BAYESIAN CLASSIFIER (Gaussian Naive Bayes)
# Logic: Uses Bayes' Theorem to calculate probability of disease based on feature likelihood.
print("\n--- Step 2: Training Bayesian Classifier ---")
nb_model = GaussianNB()
nb_model.fit(X_train, y_train) # Naive Bayes doesn't strictly require scaling
nb_preds = nb_model.predict(X_test)
nb_acc = accuracy_score(y_test, nb_preds)

# 3. DECISION TREE CLASSIFIER (ID3/CART)
# Logic: Splitting data into branches to maximize Information Gain.
print("--- Step 3: Training Decision Tree ---")
dt_model = DecisionTreeClassifier(max_depth=4, random_state=42)
dt_model.fit(X_train, y_train)
dt_preds = dt_model.predict(X_test)
dt_acc = accuracy_score(y_test, dt_preds)

# 4. MULTILAYER BACKPROPAGATION NEURAL NETWORK (MLP)
# Logic: Uses layers of neurons; error is propagated backward to update weights.
print("--- Step 4: Training Neural Network (Backpropagation) ---")
mlp_model = MLPClassifier(hidden_layer_sizes=(8, 4), max_iter=1000, random_state=42)
mlp_model.fit(X_train_scaled, y_train)
mlp_preds = mlp_model.predict(X_test_scaled)
mlp_acc = accuracy_score(y_test, mlp_preds)

# 5. PERFORMANCE COMPARISON & TEXT OUTPUT
print("\n" + "="*40)
print("FINAL DIAGNOSIS REPORT")
print("="*40)
print(f"1. Bayesian Classifier Accuracy: {nb_acc:.2%}")
print(f"2. Decision Tree Accuracy:      {dt_acc:.2%}")
print(f"3. Neural Network Accuracy:     {mlp_acc:.2%}")
print("="*40)

# Extract Decision Tree Rules for Interpretability
print("\nInterpretation (Decision Tree Logic):")
rules = export_text(dt_model, feature_names=list(X.columns))
print(rules[:300] + "...") # Print first few rules

# 6. VISUALIZATION OF CONFUSION MATRICES
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot NB
sns.heatmap(confusion_matrix(y_test, nb_preds), annot=True, fmt='d', ax=axes[0], cmap='Blues')
axes[0].set_title('Bayesian Confusion Matrix')

# Plot DT
sns.heatmap(confusion_matrix(y_test, dt_preds), annot=True, fmt='d', ax=axes[1], cmap='Greens')
axes[1].set_title('Decision Tree Confusion Matrix')

# Plot MLP
sns.heatmap(confusion_matrix(y_test, mlp_preds), annot=True, fmt='d', ax=axes[2], cmap='Oranges')
axes[2].set_title('Neural Network Confusion Matrix')

plt.tight_layout()
plt.show()