In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('..\datasets\cardio_train.csv', sep=';')

  data = pd.read_csv('..\datasets\cardio_train.csv', sep=';')


In [3]:
df = data.copy()

In [4]:
df = df[(df['ap_hi'] > df['ap_lo']) & df['ap_hi'].between(80, 250) & df['ap_lo'].between(40, 150)]

In [5]:
df

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,22431,1,163,72.0,135,80,1,2,0,0,0,1


In [7]:
def preprocessing(data, scaler=None, fit_scaler=True):
    df = data.copy()
    
    df['age_yr'] = df['age'] / 365.25

    df['bmi'] = df['weight'] / (df['height']/100)**2

    df['map'] = (2/3 * df['ap_lo']) + (1/3 * df['ap_hi'])

    df['pp'] = df['ap_hi'] - df['ap_lo']

    df['lifestyle'] = df['active'] - (df['smoke'] + df['alco'])

    df['x_syndrome'] = (
        (df['cholesterol'] > 1) &
        (df['gluc'] > 1) &
        ((df['ap_hi'] > 130) | (df['ap_lo'] > 85))
    ).astype(int)

    num_features = ['age_yr', 'height', 'weight', 'ap_hi', 'ap_lo', 
                    'bmi', 'pp', 'lifestyle', 'map']

    if fit_scaler:
        scaler = StandardScaler()
        df[num_features] = scaler.fit_transform(df[num_features])
    else:
        df[num_features] = scaler.transform(df[num_features])

    df = df.drop(columns=['id', 'age'])

    return df, scaler




In [9]:
from sklearn.model_selection import train_test_split

y = df['cardio']
X = df.drop('cardio', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_train_prep, scaler = preprocessing(X_train, fit_scaler=True)
X_test_prep, _ = preprocessing(X_test, scaler = scaler, fit_scaler=False)


In [10]:
class CVSDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.long)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.labels[index]
    
train_data = CVSDataset(X_train_prep, y_train)
test_data = CVSDataset(X_test_prep, y_test)


In [27]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


In [34]:
class SimpleANN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleANN, self).__init__()

        self.hidden1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(64)
        self.hidden2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.hidden3 = nn.Linear(32, 16)
        self.dropout = nn.Dropout(0.3)
        self.output = nn.Linear(16, output_size)
        # self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.hidden1(x)))
        x = self.relu(self.bn2(self.hidden2(x)))
        x = self.dropout(self.relu(self.hidden3(x)))
        x = self.output(x)
        return x

    
input_size = X_train_prep.shape[1]
hidden_size = 64
output_size = 2

model = SimpleANN(input_size, hidden_size, output_size)

In [35]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)

In [36]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [37]:
# Training loop
epochs = 100
best_val_loss = float('inf')
patience, trigger = 10, 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    scheduler.step()
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {running_loss/len(train_loader):.4f} | Val Loss: {val_loss/len(test_loader):.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger = 0
        best_model_state = model.state_dict()
    else:
        trigger += 1
        if trigger >= patience:
            print("Early stopping triggered!")
            break

# Load best model
model.load_state_dict(best_model_state)

Epoch [1/100] - Train Loss: 0.5643 | Val Loss: 0.5467
Epoch [2/100] - Train Loss: 0.5557 | Val Loss: 0.5442
Epoch [3/100] - Train Loss: 0.5542 | Val Loss: 0.5448
Epoch [4/100] - Train Loss: 0.5523 | Val Loss: 0.5452
Epoch [5/100] - Train Loss: 0.5522 | Val Loss: 0.5440
Epoch [6/100] - Train Loss: 0.5511 | Val Loss: 0.5433
Epoch [7/100] - Train Loss: 0.5519 | Val Loss: 0.5436
Epoch [8/100] - Train Loss: 0.5492 | Val Loss: 0.5426
Epoch [9/100] - Train Loss: 0.5511 | Val Loss: 0.5447
Epoch [10/100] - Train Loss: 0.5500 | Val Loss: 0.5430
Epoch [11/100] - Train Loss: 0.5501 | Val Loss: 0.5428
Epoch [12/100] - Train Loss: 0.5486 | Val Loss: 0.5416
Epoch [13/100] - Train Loss: 0.5497 | Val Loss: 0.5426
Epoch [14/100] - Train Loss: 0.5488 | Val Loss: 0.5424
Epoch [15/100] - Train Loss: 0.5479 | Val Loss: 0.5435
Epoch [16/100] - Train Loss: 0.5480 | Val Loss: 0.5453
Epoch [17/100] - Train Loss: 0.5480 | Val Loss: 0.5441
Epoch [18/100] - Train Loss: 0.5478 | Val Loss: 0.5430
Epoch [19/100] - Tr

<All keys matched successfully>

In [38]:
model.eval()
all_predicted_labels = []
all_test_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs) 

        _, predicted = torch.max(outputs, 1) 

        all_predicted_labels.extend(predicted.numpy())
        all_test_labels.extend(labels.numpy())

predicted_labels_np = np.array(all_predicted_labels)
test_labels_np = np.array(all_test_labels)

# Compute prediction error as a percentage
prediction_error_test = np.sum(np.abs(predicted_labels_np - test_labels_np)/len(test_labels_np))*100 # Compute the average absolute error percentage
print("Prediction error on testing set:", prediction_error_test)



Prediction error on testing set: 26.541906356950406


In [39]:
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")


Accuracy: 73.46%


In [40]:
torch.save(model.state_dict(), "cardio_ann_model_extended.pth")
