In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


df = pd.read_csv("cardio_train.csv", sep=';')
df.drop(columns=['id'], inplace=True)
X = df.drop(columns=['cardio'])
y = df['cardio']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

class CardioDataset(Dataset):
    def __init__(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_loader = DataLoader(CardioDataset(X_train, y_train), batch_size=64, shuffle=True)
valid_loader = DataLoader(CardioDataset(X_valid, y_valid), batch_size=64)
test_loader = DataLoader(CardioDataset(X_test, y_test), batch_size=64)

In [7]:
class CardioModel(nn.Module):
    def __init__(self, input_dim):
        super(CardioModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CardioModel(X_train.shape[1]).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_epoch(loader):
    model.train()
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [8]:
def evaluate(loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)
            preds = model(X_batch).cpu().numpy()
            y_true.extend(y_batch.numpy())
            y_pred.extend(preds)
    y_pred_binary = (np.array(y_pred) > 0.5).astype(int)
    return {
        "roc_auc": roc_auc_score(y_true, y_pred),
        "accuracy": accuracy_score(y_true, y_pred_binary),
        "f1": f1_score(y_true, y_pred_binary),
        "precision": precision_score(y_true, y_pred_binary),
        "recall": recall_score(y_true, y_pred_binary)
    }

for epoch in range(10):
    train_epoch(train_loader)
    if (epoch + 1) % 2 == 0:
        print(f"Epoch {epoch+1}")

print("Train Metrics:", evaluate(train_loader))
print("Valid Metrics:", evaluate(valid_loader))
print("Test Metrics:", evaluate(test_loader))


Epoch 2
Epoch 4
Epoch 6
Epoch 8
Epoch 10
Train Metrics: {'roc_auc': 0.8043330053726383, 'accuracy': 0.7371020408163266, 'f1': 0.7318707851136458, 'precision': 0.7462540854874995, 'recall': 0.7180314478251991}
Valid Metrics: {'roc_auc': 0.7962435978663902, 'accuracy': 0.7284761904761905, 'f1': 0.7243013248235181, 'precision': 0.7351786415390655, 'recall': 0.7137411854392987}
Test Metrics: {'roc_auc': 0.7946786812556239, 'accuracy': 0.73, 'f1': 0.7245700961818712, 'precision': 0.739001189060642, 'recall': 0.710691823899371}
