<a href="https://www.kaggle.com/code/faryalrifaz3374/heart-disease-prediction-5-ml-models?scriptVersionId=269192690" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## **Author: Faryal Rifaz**

---

# **Heart Disease Prediction using machine learning models**

---

## Stay Connected!

 If you found this notebook helpful or interesting, don't forget to **upvote** and **follow** me for more beginner-friendly, practical notebooks.
 
#### Let’s connect and grow together on this learning journey.



### Email: faryalrifaz3374@gmail.com
### [LinkedIn](https://www.linkedin.com/in/faryal-rifaz-b8a885304/)
### [Github](https://github.com/Faryalrifaz)


![Heart Disease Image](https://media.wltx.com/assets/WTHR/images/da073447-4545-4003-89c8-3b81b8850eb3/da073447-4545-4003-89c8-3b81b8850eb3_1920x1080.jpg)


 #### Let's get start, first of all we import all the necessary libraries 

# Step 1: Import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
import matplotlib.pyplot as plt

# Step 2: Load dataset

In [None]:

train = pd.read_csv("/kaggle/input/heart-disease-prediction-dataquest/heart_train.csv")  
test = pd.read_csv("/kaggle/input/heart-disease-prediction-dataquest/heart_test.csv")
train.head()

# Step 3: Descriptive Statistics

In [None]:
train.info()

In [None]:
train.describe()

# Step 4: Encode categorical features

In [None]:
from sklearn.preprocessing import LabelEncoder
train_df = train.copy()
le = LabelEncoder()
cols_to_encode = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

for col in cols_to_encode:
    train_df[col] = le.fit_transform(train_df[col])

train_df.head()


# Step 4: Define features (X) and target (y)

In [None]:
X = train_df.drop('HeartDisease', axis=1)
y = train_df['HeartDisease']


# Step 5: Train-test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


# Step 6: Feature scaling

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



# Step 7: Build the MLP model

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.2))
model.add(Dense(62, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))


# Step 8: Compile the model

In [None]:
from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.0005)  # Lower learning rate

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Step 9: Train the model

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
    X_train, y_train, epochs=100, batch_size=16,
    validation_split=0.2, callbacks=[early_stop], verbose=0
)


# Step 10: Predict and evaluate

In [None]:
y_pred = model.predict(X_test)
y_pred_class = (y_pred > 0.5).astype("int32")

# Step 11: Results

In [None]:
acc = accuracy_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
report = classification_report(y_test, y_pred_class)
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:")
print(report)

# Step 12: Load test dataset (without HeartDisease column)

In [None]:

test_df = pd.read_csv("/kaggle/input/heart-disease-prediction-dataquest/heart_test.csv")

# Store ID column if it exists
if "id" in test_df.columns:
    ids = test_df["id"]
else:
    ids = pd.Series(range(len(test_df)))  # fallback if no ID column

# Encode categorical columns (ensure same columns as training)
test_df_encoded = test_df.copy()
for col in cols_to_encode:
    test_df_encoded[col] = le.fit_transform(test_df_encoded[col].astype(str))

# Scale features (must drop ID column if it exists)
X_final_test = test_df_encoded.drop(columns=["id"], errors="ignore")
X_final_test = scaler.transform(X_final_test)

# Predict using trained MLP model
test_pred = model.predict(X_final_test)
test_pred_class = (test_pred > 0.5).astype("int32").ravel()

# Create submission DataFrame
submission = pd.DataFrame({
    "id": ids,
    "HeartDisease": test_pred_class
})

