# Heart Attach Prediction 

# Import Libraries 


In [467]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, classification_report
)

# Load Data 

In [468]:
df = pd.read_csv("/kaggle/input/heart-attack-prediction/data.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


# Data Analysis and Cleaning 

In [469]:
df.shape

(294, 14)

In [470]:
df.isnull().sum()

age           0
sex           0
cp            0
trestbps      0
chol          0
fbs           0
restecg       0
thalach       0
exang         0
oldpeak       0
slope         0
ca            0
thal          0
num           0
dtype: int64

In [471]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294 entries, 0 to 293
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   age         294 non-null    int64  
 1   sex         294 non-null    int64  
 2   cp          294 non-null    int64  
 3   trestbps    294 non-null    object 
 4   chol        294 non-null    object 
 5   fbs         294 non-null    object 
 6   restecg     294 non-null    object 
 7   thalach     294 non-null    object 
 8   exang       294 non-null    object 
 9   oldpeak     294 non-null    float64
 10  slope       294 non-null    object 
 11  ca          294 non-null    object 
 12  thal        294 non-null    object 
 13  num         294 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 32.3+ KB


In [472]:
df.describe()

Unnamed: 0,age,sex,cp,oldpeak,num
count,294.0,294.0,294.0,294.0,294.0
mean,47.826531,0.72449,2.982993,0.586054,0.360544
std,7.811812,0.447533,0.965117,0.908648,0.480977
min,28.0,0.0,1.0,0.0,0.0
25%,42.0,0.0,2.0,0.0,0.0
50%,49.0,1.0,3.0,0.0,0.0
75%,54.0,1.0,4.0,1.0,1.0
max,66.0,1.0,4.0,5.0,1.0


In [473]:
df.duplicated().sum()

1

In [474]:
df = df.drop_duplicates(df)

In [475]:
df.duplicated().sum()

0

In [476]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num       '],
      dtype='object')

In [477]:
df["slope"].value_counts()

slope
?    189
2     91
1     12
3      1
Name: count, dtype: int64

In [478]:
df["ca"].value_counts()

ca
?    290
0      3
Name: count, dtype: int64

In [479]:
df["thal"].value_counts()

thal
?    265
7     11
6     10
3      7
Name: count, dtype: int64

In [480]:
df["thal"].value_counts()

thal
?    265
7     11
6     10
3      7
Name: count, dtype: int64

In [481]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num       '],
      dtype='object')

In [482]:
cols = ['age', 'sex', 'cp', 'trestbps', 'fbs', 'restecg',
        'thalach', 'exang', 'oldpeak', 'slope', 'thal','chol']

df[cols] = df[cols].replace("?", 0)
df[cols] = df[cols].replace("?", 0).astype(float)  # or int if all values are integers


In [483]:
df['num'] = df['num       ']

In [484]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num       ', 'num'],
      dtype='object')

In [485]:
df = df.drop(["num       ",'ca',],axis=1)

In [486]:
df['num'].value_counts()

num
0    187
1    106
Name: count, dtype: int64

In [487]:
df.dtypes

age         float64
sex         float64
cp          float64
trestbps    float64
chol        float64
fbs         float64
restecg     float64
thalach     float64
exang       float64
oldpeak     float64
slope       float64
thal        float64
num           int64
dtype: object

In [488]:
df.dtypes

age         float64
sex         float64
cp          float64
trestbps    float64
chol        float64
fbs         float64
restecg     float64
thalach     float64
exang       float64
oldpeak     float64
slope       float64
thal        float64
num           int64
dtype: object

In [489]:
X = df.drop("num",axis=1)
y = df["num"]

In [490]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

# Model Implementation 

In [491]:
model = XGBClassifier(
    objective='binary:logistic',  # for binary classification
    eval_metric='logloss',        # evaluation metric
    learning_rate=0.1,
    max_depth=6,
    n_estimators=200
)

# Train the model
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation

In [492]:
def evaluate_model(model, X_test, y_test):
    # Predictions
    y_pred = model.predict(X_test)

    # --- Basic Metrics ---
    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision (Macro)": precision_score(y_test, y_pred, average='macro'),
        "Recall (Macro)": recall_score(y_test, y_pred, average='macro'),
        "F1-score (Macro)": f1_score(y_test, y_pred, average='macro'),

        "Precision (Micro)": precision_score(y_test, y_pred, average='micro'),
        "Recall (Micro)": recall_score(y_test, y_pred, average='micro'),
        "F1-score (Micro)": f1_score(y_test, y_pred, average='micro'),

        "Precision (Weighted)": precision_score(y_test, y_pred, average='weighted'),
        "Recall (Weighted)": recall_score(y_test, y_pred, average='weighted'),
        "F1-score (Weighted)": f1_score(y_test, y_pred, average='weighted'),
    }

    # --- Confusion Matrix ---
    conf_matrix = confusion_matrix(y_test, y_pred)

    # --- ROC-AUC (Multi-class ready) ---
    try:
        y_true_bin = pd.get_dummies(y_test).to_numpy()
        y_pred_bin = pd.get_dummies(y_pred).to_numpy()
        roc_auc_macro = roc_auc_score(y_true_bin, y_pred_bin, average="macro", multi_class="ovr")
        metrics["ROC-AUC (Macro)"] = roc_auc_macro
    except ValueError:
        metrics["ROC-AUC (Macro)"] = None

    # --- Classification Report ---
    class_report = classification_report(y_test, y_pred)

    # --- Print Metrics ---
    print("📊 Evaluation Metrics:")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}" if v is not None else f"{k}: Not applicable")

    print("\n📌 Confusion Matrix:")
    print(conf_matrix)

    print("\n📌 Classification Report:")
    print(class_report)

    return metrics, conf_matrix, class_report

# ---- Usage ----
metrics, conf_matrix, report = evaluate_model(model, X_test, y_test)


📊 Evaluation Metrics:
Accuracy: 0.8305
Precision (Macro): 0.8004
Recall (Macro): 0.8313
F1-score (Macro): 0.8109
Precision (Micro): 0.8305
Recall (Micro): 0.8305
F1-score (Micro): 0.8305
Precision (Weighted): 0.8466
Recall (Weighted): 0.8305
F1-score (Weighted): 0.8346
ROC-AUC (Macro): 0.8313

📌 Confusion Matrix:
[[34  7]
 [ 3 15]]

📌 Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.83      0.87        41
           1       0.68      0.83      0.75        18

    accuracy                           0.83        59
   macro avg       0.80      0.83      0.81        59
weighted avg       0.85      0.83      0.83        59

