In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
import torch
import torch.nn as nn
import torch.optim as optim

df = pd.read_csv("heart_attack_prediction_dataset.csv")
df.drop(columns=['Patient ID'], inplace=True)

bp_split = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic BP'] = pd.to_numeric(bp_split[0], errors='coerce')
df['Diastolic BP'] = pd.to_numeric(bp_split[1], errors='coerce')
df.drop(columns=['Blood Pressure'], inplace=True)

categorical_cols = ['Sex', 'Country', 'Continent', 'Hemisphere', 'Diet']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
df.dropna(inplace=True)

X = df.drop(columns=['Heart Attack Risk'])
y = df['Heart Attack Risk']

for col in X.columns:
    if X[col].dtype == 'object':
        print(f"Column {col} has non-numeric values: {X[col].unique()}")
        X = X[pd.to_numeric(X[col], errors='coerce').notna()]
        y = y[X.index] 
        X[col] = X[col].astype(float)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

class HeartNet(nn.Module):
    def __init__(self, input_dim):
        super(HeartNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

model = HeartNet(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_pred_probs = model(X_test_tensor).numpy()
    y_pred = (y_pred_probs > 0.5).astype(int)

print("\nMetrics:")
print("ROC AUC:", roc_auc_score(y_test, y_pred_probs))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

importances = np.abs(model.fc1.weight.detach().numpy()[0])
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)
print("\nTop Features:")
print(feature_importance.head(10))


Epoch [10/100], Loss: 0.6823
Epoch [20/100], Loss: 0.6656
Epoch [30/100], Loss: 0.6552
Epoch [40/100], Loss: 0.6494
Epoch [50/100], Loss: 0.6462
Epoch [60/100], Loss: 0.6441
Epoch [70/100], Loss: 0.6425
Epoch [80/100], Loss: 0.6410
Epoch [90/100], Loss: 0.6396
Epoch [100/100], Loss: 0.6382

Metrics:
ROC AUC: 0.4860240622788393
Accuracy: 0.6423274386765545
F1 Score: 0.003179650238473768
Precision: 1.0
Recall: 0.0015923566878980893

Top Features:
Age                        0.119232
Diet_Healthy               0.118180
Country_France             0.116157
Country_South Africa       0.111995
Country_United Kingdom     0.111778
Exercise Hours Per Week    0.105841
Country_India              0.104074
BMI                        0.101074
Continent_Australia        0.094332
Alcohol Consumption        0.085883
dtype: float32


In [8]:
df = pd.read_csv("heart_attack_prediction_dataset.csv")
df.drop(columns=['Patient ID'], inplace=True)

bp_split = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic BP'] = pd.to_numeric(bp_split[0], errors='coerce')
df['Diastolic BP'] = pd.to_numeric(bp_split[1], errors='coerce')
df.drop(columns=['Blood Pressure'], inplace=True)

categorical_cols = ['Sex', 'Country', 'Continent', 'Hemisphere', 'Diet']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
df.dropna(inplace=True)

top_features = [
    'Age',
    'Diet_Healthy',
    'Country_France',
    'Country_South Africa',
    'Country_United Kingdom',
    'Exercise Hours Per Week',
    'Country_India',
    'BMI',
    'Continent_Australia',
    'Alcohol Consumption'
]

X = df[top_features]
y = df['Heart Attack Risk']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

class HeartNet(nn.Module):
    def __init__(self, input_dim):
        super(HeartNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

model = HeartNet(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_pred_probs = model(X_test_tensor).numpy()
    y_pred = (y_pred_probs > 0.5).astype(int)

print("\nMetrics:")
print("ROC AUC:", roc_auc_score(y_test, y_pred_probs))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

Epoch [10/100], Loss: 0.6579
Epoch [20/100], Loss: 0.6543
Epoch [30/100], Loss: 0.6531
Epoch [40/100], Loss: 0.6524
Epoch [50/100], Loss: 0.6519
Epoch [60/100], Loss: 0.6516
Epoch [70/100], Loss: 0.6514
Epoch [80/100], Loss: 0.6512
Epoch [90/100], Loss: 0.6510
Epoch [100/100], Loss: 0.6508

Metrics:
ROC AUC: 0.48269780608634116
Accuracy: 0.6417569880205363
F1 Score: 0.0
Precision: 0.0
Recall: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
