In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    silhouette_score,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)

import tensorflow as tf
from tensorflow.keras import layers, models

In [8]:
# 1) Load and clean the dataset
url = "https://raw.githubusercontent.com/kb22/Heart-Disease-Prediction/master/dataset.csv"
df = pd.read_csv(url).dropna()

In [10]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [11]:
# 2) Split features (X) and target (y)
X = df.drop(columns=["target"]).values
y = df["target"].values

In [12]:
# 3) Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [13]:
# 4) Clustering with K-Means
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
sil_score = silhouette_score(X_scaled, clusters)
print(f"Silhouette Score (2 clusters): {sil_score:.3f}")

Silhouette Score (2 clusters): 0.166


In [14]:
# 5) Prepare augmented feature set including cluster label
X_aug = np.hstack([X_scaled, clusters.reshape(-1, 1)])

In [15]:
# 6) Train/Test split for TensorFlow model (using original features)
X_train_tf, X_test_tf, y_train_tf, y_test_tf = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

In [16]:
# 7) Build and train a TensorFlow MLP
tf_model = models.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train_tf.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(16, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])
tf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

tf_history = tf_model.fit(
    X_train_tf, y_train_tf,
    validation_split=0.2,
    epochs=50,
    batch_size=16,
    verbose=2
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
11/11 - 1s - 135ms/step - accuracy: 0.5444 - loss: 0.7096 - val_accuracy: 0.5116 - val_loss: 0.6722
Epoch 2/50
11/11 - 0s - 9ms/step - accuracy: 0.5621 - loss: 0.6656 - val_accuracy: 0.6977 - val_loss: 0.6287
Epoch 3/50
11/11 - 0s - 9ms/step - accuracy: 0.5799 - loss: 0.6592 - val_accuracy: 0.6977 - val_loss: 0.5953
Epoch 4/50
11/11 - 0s - 10ms/step - accuracy: 0.6805 - loss: 0.5960 - val_accuracy: 0.8372 - val_loss: 0.5663
Epoch 5/50
11/11 - 0s - 9ms/step - accuracy: 0.7219 - loss: 0.5981 - val_accuracy: 0.8372 - val_loss: 0.5382
Epoch 6/50
11/11 - 0s - 14ms/step - accuracy: 0.6746 - loss: 0.5834 - val_accuracy: 0.8837 - val_loss: 0.5093
Epoch 7/50
11/11 - 0s - 9ms/step - accuracy: 0.6864 - loss: 0.5765 - val_accuracy: 0.8605 - val_loss: 0.4812
Epoch 8/50
11/11 - 0s - 10ms/step - accuracy: 0.7692 - loss: 0.5471 - val_accuracy: 0.8837 - val_loss: 0.4571
Epoch 9/50
11/11 - 0s - 9ms/step - accuracy: 0.7515 - loss: 0.5570 - val_accuracy: 0.8837 - val_loss: 0.4334
Epoch 10/50
11

In [17]:
# 8) Evaluate TensorFlow model
loss, acc = tf_model.evaluate(X_test_tf, y_test_tf, verbose=0)
print(f"\nTensorFlow MLP Test Accuracy: {acc:.3f}")


TensorFlow MLP Test Accuracy: 0.725


In [18]:
# 9) Train/Test split for scikit-learn MLP (using augmented features)
X_train_sk, X_test_sk, y_train_sk, y_test_sk = train_test_split(
    X_aug, y, test_size=0.3, random_state=42, stratify=y
)

In [19]:
# 10) Build and train a scikit-learn MLPClassifier
sk_mlp = MLPClassifier(
    hidden_layer_sizes=(50, 25),
    activation="relu",
    solver="adam",
    max_iter=200,
    random_state=42
)
sk_mlp.fit(X_train_sk, y_train_sk)
y_pred_sk = sk_mlp.predict(X_test_sk)



In [20]:
# 11) Evaluate scikit-learn model
acc_sk  = accuracy_score(y_test_sk, y_pred_sk)
prec_sk = precision_score(y_test_sk, y_pred_sk)
rec_sk  = recall_score(y_test_sk, y_pred_sk)
f1_sk   = f1_score(y_test_sk, y_pred_sk)

print("\n=== scikit-learn MLP Results ===")
print(f"Accuracy : {acc_sk:.3f}")
print(f"Precision: {prec_sk:.3f}")
print(f"Recall   : {rec_sk:.3f}")
print(f"F1 Score : {f1_sk:.3f}\n")

print("Classification Report:")
print(classification_report(y_test_sk, y_pred_sk))

print("Confusion Matrix:")
print(confusion_matrix(y_test_sk, y_pred_sk))


=== scikit-learn MLP Results ===
Accuracy : 0.747
Precision: 0.776
Recall   : 0.760
F1 Score : 0.768

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.73      0.72        41
           1       0.78      0.76      0.77        50

    accuracy                           0.75        91
   macro avg       0.74      0.75      0.75        91
weighted avg       0.75      0.75      0.75        91

Confusion Matrix:
[[30 11]
 [12 38]]
