<a href="https://www.kaggle.com/code/faryalrifaz3374/heart-disease-prediction-with-mlp?scriptVersionId=253452417" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## **Author: Faryal Rifaz**

## "**Heart Disease Prediction with MLP**"

## Stay Connected!

 If you found this notebook helpful or interesting, don't forget to **upvote** and **follow** me for more beginner-friendly, practical notebooks.
 
#### Let’s connect and grow together on this learning journey.



### Email: faryalrifaz3374@gmail.com
### [LinkedIn](https://www.linkedin.com/in/faryal-rifaz-b8a885304/)
### [Github](https://github.com/Faryalrifaz)
### [Upwork](https://www.upwork.com/freelancers/~01d7c98961375e7879?mp_source=share)

![Heart Disease Image](https://media.wltx.com/assets/WTHR/images/da073447-4545-4003-89c8-3b81b8850eb3/da073447-4545-4003-89c8-3b81b8850eb3_1920x1080.jpg)


# Step 1: Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

2025-07-31 08:16:19.792836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753949780.042692      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753949780.115079      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Step 2: Load dataset and check correlation

In [2]:

train_df = pd.read_csv("/kaggle/input/heart-disease-prediction-dataquest/heart_train.csv")  
import seaborn as sns
import matplotlib.pyplot as plt

# Step 3: Encode categorical features

In [3]:
train_df_encoded = train_df.copy()
le = LabelEncoder()
cols_to_encode = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

for col in cols_to_encode:
    train_df_encoded[col] = le.fit_transform(train_df_encoded[col])


# Step 4: Define features (X) and target (y)

In [4]:
X = train_df_encoded.drop('HeartDisease', axis=1)
y = train_df_encoded['HeartDisease']


# Step 5: Train-test split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


# Step 6: Feature scaling

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



# Step 7: Build the MLP model

In [7]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.2))
model.add(Dense(62, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-07-31 08:16:36.206441: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


# Step 8: Compile the model

In [8]:
from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.0005)  # Lower learning rate

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Step 9: Train the model

In [9]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
    X_train, y_train, epochs=100, batch_size=16,
    validation_split=0.2, callbacks=[early_stop], verbose=0
)


# Step 10: Predict and evaluate

In [10]:
y_pred = model.predict(X_test)
y_pred_class = (y_pred > 0.5).astype("int32")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


# Step 11: Results

In [11]:
acc = accuracy_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
report = classification_report(y_test, y_pred_class)
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:")
print(report)

Accuracy: 0.8552
F1 Score: 0.8667

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84        99
           1       0.88      0.85      0.87       122

    accuracy                           0.86       221
   macro avg       0.85      0.86      0.85       221
weighted avg       0.86      0.86      0.86       221



# Step 12: Load test dataset (without HeartDisease column)

In [12]:

test_df = pd.read_csv("/kaggle/input/heart-disease-prediction-dataquest/heart_test.csv")

# Store ID column if it exists
if "id" in test_df.columns:
    ids = test_df["id"]
else:
    ids = pd.Series(range(len(test_df)))  # fallback if no ID column

# Encode categorical columns (ensure same columns as training)
test_df_encoded = test_df.copy()
for col in cols_to_encode:
    test_df_encoded[col] = le.fit_transform(test_df_encoded[col].astype(str))

# Scale features (must drop ID column if it exists)
X_final_test = test_df_encoded.drop(columns=["id"], errors="ignore")
X_final_test = scaler.transform(X_final_test)

# Predict using trained MLP model
test_pred = model.predict(X_final_test)
test_pred_class = (test_pred > 0.5).astype("int32").ravel()

# Create submission DataFrame
submission = pd.DataFrame({
    "id": ids,
    "HeartDisease": test_pred_class
})

# Save to CSV
submission.to_csv("submission.csv", index=False)
print("✅ Submission file created: submission.csv")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
✅ Submission file created: submission.csv
