<a href="https://colab.research.google.com/github/Plutobi/Former/blob/main/Predicting_Heart_Disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y = train['Heart Disease'].map({'Absence':0,'Presence':1}).values
X = train.drop(columns=['Heart Disease', 'id']) # Drop 'id' from X as well

# One-hot categorical
X = pd.get_dummies(X, columns=['Thallium','Chest pain type'], drop_first=False)
test = pd.get_dummies(test, columns=['Thallium','Chest pain type'], drop_first=False)

# Align columns
X, test = X.align(test, join='left', axis=1, fill_value=0)

# Scale
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(test) # Removed .drop(columns=['id']) as 'id' was already removed by align

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

class MLPPeriodic(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.freq = nn.Parameter(torch.randn(d))
        self.fc1 = nn.Linear(d*3, 256)
        self.fc2 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 1)

    def forward(self, x):
        periodic = torch.cat([torch.sin(x*self.freq),
                              torch.cos(x*self.freq)], dim=1)
        x = torch.cat([x, periodic], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)

model = MLPPeriodic(X.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.BCEWithLogitsLoss()

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

for epoch in range(20):
    optimizer.zero_grad()
    logits = model(X_train)
    loss = loss_fn(logits, y_train)
    loss.backward()
    optimizer.step()
    print(epoch, loss.item())

0 0.7047319412231445
1 0.6721367239952087
2 0.6426302790641785
3 0.6146001815795898
4 0.5868286490440369
5 0.5585232377052307
6 0.5293790102005005
7 0.49951493740081787
8 0.4693659543991089
9 0.4395825266838074
10 0.410946786403656
11 0.3842642605304718
12 0.36027398705482483
13 0.33957311511039734
14 0.3225889205932617
15 0.30952635407447815
16 0.3003339469432831
17 0.2946780323982239
18 0.29194122552871704
19 0.2912975549697876


In [7]:
from sklearn.metrics import roc_auc_score

# Put model in evaluation mode
model.eval()

# Convert X_val to tensor
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)

# Get predictions on validation set
with torch.no_grad():
    logits_val = model(X_val_tensor)
    probabilities_val = torch.sigmoid(logits_val).cpu().numpy()

# Calculate AUC score
auc_score = roc_auc_score(y_val, probabilities_val)

print(f"AUC Score on Validation Set: {auc_score:.4f}")

AUC Score on Validation Set: 0.9492


In [8]:
test_original = pd.read_csv('test.csv') # Load original test data to get 'id'

# Put model in evaluation mode
model.eval()

# Convert X_test (numpy array) to tensor
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Get predictions on the test set
with torch.no_grad():
    logits_test = model(X_test_tensor)
    probabilities_test = torch.sigmoid(logits_test).cpu().numpy()

# Create submission DataFrame
submission = pd.DataFrame({
    'id': test_original['id'],
    'Heart Disease': probabilities_test.flatten()
})

# Save submission file
submission.to_csv('submission.csv', index=False)

print('Submission file created successfully: submission.csv')

Submission file created successfully: submission.csv
