In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

In [7]:
data = pd.read_csv('..\datasets\cardio_train.csv', sep=';')

  data = pd.read_csv('..\datasets\cardio_train.csv', sep=';')


In [11]:
data

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,22431,1,163,72.0,135,80,1,2,0,0,0,1


In [12]:
data.columns

Index(['id', 'age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
       'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio'],
      dtype='object')

In [13]:
data.isnull().sum()

id             0
age            0
gender         0
height         0
weight         0
ap_hi          0
ap_lo          0
cholesterol    0
gluc           0
smoke          0
alco           0
active         0
cardio         0
dtype: int64

In [14]:
def preprocessing(data, scaler=None, fit_scaler=True):
    df = data.copy()
    
    df['age_yr'] = df['age'] / 365.25

    df['bmi'] = df['weight'] / (df['height']/100)**2

    df['map'] = (2/3 * df['ap_lo']) + (1/3 * df['ap_hi'])

    df['pp'] = df['ap_hi'] - df['ap_lo']

    df['lifestyle'] = df['active'] - (df['smoke'] + df['alco'])

    df['x_syndrome'] = (
        (df['cholesterol'] > 1) &
        (df['gluc'] > 1) &
        ((df['ap_hi'] > 130) | (df['ap_lo'] > 85))
    ).astype(int)

    num_features = ['age_yr', 'height', 'weight', 'ap_hi', 'ap_lo', 
                    'bmi', 'pp', 'lifestyle', 'map']

    if fit_scaler:
        scaler = StandardScaler()
        df[num_features] = scaler.fit_transform(df[num_features])
    else:
        df[num_features] = scaler.transform(df[num_features])

    df = df.drop(columns=['id', 'age'])

    return df, scaler




In [None]:
from sklearn.model_selection import train_test_split

y = data['cardio']
X = data.drop('cardio', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

 scaler = preprocessing(X_train, fit_scaler=True)
X_test_prep, _ = preprocessing(X_test, scaler = scaler, fit_scaler=False)


In [24]:
class CVSDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.long)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.labels[index]
    
train_data = CVSDataset(X_train_prep, y_train)
test_data = CVSDataset(X_test_prep, y_test)


In [25]:
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)


In [29]:
class SimpleANN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleANN, self).__init__()

        self.hidden = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        x = self.softmax(x)
        return x

    
input_size = X_train_prep.shape[1]
hidden_size = 8
output_size = 2

model = SimpleANN(input_size, hidden_size, output_size)

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [32]:
epochs = 1000

for epoch in range(epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()


    print(f"Epoch:{epoch}, Loss: {loss.item():.4f}")

Epoch:0, Loss: 0.4864
Epoch:1, Loss: 0.4623
Epoch:2, Loss: 0.5731
Epoch:3, Loss: 0.5895
Epoch:4, Loss: 0.5353
Epoch:5, Loss: 0.4700
Epoch:6, Loss: 0.6479
Epoch:7, Loss: 0.6662
Epoch:8, Loss: 0.7251
Epoch:9, Loss: 0.4928
Epoch:10, Loss: 0.5547
Epoch:11, Loss: 0.5723
Epoch:12, Loss: 0.4729
Epoch:13, Loss: 0.7011
Epoch:14, Loss: 0.4248
Epoch:15, Loss: 0.7052
Epoch:16, Loss: 0.5684
Epoch:17, Loss: 0.6155
Epoch:18, Loss: 0.5072
Epoch:19, Loss: 0.6735
Epoch:20, Loss: 0.6758
Epoch:21, Loss: 0.5964
Epoch:22, Loss: 0.6243
Epoch:23, Loss: 0.5638
Epoch:24, Loss: 0.4777
Epoch:25, Loss: 0.5670
Epoch:26, Loss: 0.6397
Epoch:27, Loss: 0.5598
Epoch:28, Loss: 0.6699
Epoch:29, Loss: 0.4997
Epoch:30, Loss: 0.5151
Epoch:31, Loss: 0.5772
Epoch:32, Loss: 0.3742
Epoch:33, Loss: 0.6631
Epoch:34, Loss: 0.5207
Epoch:35, Loss: 0.5698
Epoch:36, Loss: 0.6362
Epoch:37, Loss: 0.4658
Epoch:38, Loss: 0.4630
Epoch:39, Loss: 0.5290
Epoch:40, Loss: 0.5805
Epoch:41, Loss: 0.4431
Epoch:42, Loss: 0.6334
Epoch:43, Loss: 0.668

In [None]:
    if (epoch + 1) % 50 == 0:
        print(f"Epoch: [{epoch}/{epochs}], Loss: {loss.item():.4f}")

In [33]:
model.eval()
all_predicted_labels = []
all_test_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs) 

        _, predicted = torch.max(outputs, 1) 

        all_predicted_labels.extend(predicted.numpy())
        all_test_labels.extend(labels.numpy())

predicted_labels_np = np.array(all_predicted_labels)
test_labels_np = np.array(all_test_labels)

# Compute prediction error as a percentage
prediction_error_test = np.sum(np.abs(predicted_labels_np - test_labels_np)/len(test_labels_np))*100 # Compute the average absolute error percentage
print("Prediction error on testing set:", prediction_error_test)



Prediction error on testing set: 26.728571428571435


In [34]:
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")


Accuracy: 73.27%


In [35]:
torch.save(model.state_dict(), "cardio_ann_model.pth")


In [36]:
test = pd.read_csv('../datasets/test.csv')

In [41]:
id_no = test.id

In [42]:
id_no

0        26681
1        58585
2        54339
3        17273
4        25420
         ...  
13995    34457
13996    40980
13997    83726
13998    11086
13999    26267
Name: id, Length: 14000, dtype: int64

In [38]:
test_prep, _ = preprocessing(test, scaler = scaler, fit_scaler=False)

In [39]:
test_prep

Unnamed: 0,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,age_yr,bmi,map,pp,lifestyle,x_syndrome
0,1,-1.135796,-1.023103,-0.055968,-0.060952,1,1,0,0,1,-0.032755,-0.455301,-0.077394,0.011015,0.593478,0
1,1,-0.528034,-1.057850,0.005932,-0.035121,1,1,0,0,1,0.655774,-0.733935,-0.029968,0.030994,0.593478,0
2,2,1.295251,0.957492,-0.055968,-0.086783,2,1,0,0,1,-1.761997,0.188011,-0.101106,0.030994,0.593478,0
3,2,1.538356,-0.849366,-0.055968,-0.035121,1,1,0,0,1,-0.276888,-1.261851,-0.053681,-0.008965,0.593478,0
4,1,0.322832,0.471030,-0.055968,-0.086783,1,1,0,0,1,-0.526302,0.238099,-0.101106,0.030994,0.593478,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13995,1,0.201280,0.957492,0.191632,0.016541,1,3,0,0,1,0.659430,0.706747,0.088595,0.110912,0.593478,0
13996,2,1.416804,2.138899,0.067832,-0.035121,1,1,0,0,1,1.208628,1.024810,-0.006256,0.070953,0.593478,0
13997,1,-0.649587,0.887997,0.191632,5.182743,1,1,0,0,1,0.976681,1.108446,4.831123,-3.884974,0.593478,0
13998,1,0.079728,-0.084926,-0.055968,-0.086783,1,1,0,0,1,-0.469838,-0.123383,-0.101106,0.030994,0.593478,0


In [43]:
class CVSDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data.values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]


model.eval()

test_data = CVSDataset(test_prep)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# ===============================
# 4. Predict on Test Data
# ===============================
all_preds = []

with torch.no_grad():
    for inputs in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())

# ===============================
# 5. Save Predictions to CSV
# ===============================
submission = pd.DataFrame({
    "Id": id_no,  # or your actual test IDs if available
    "Predicted": all_preds
})

submission.to_csv("submission.csv", index=False)
print("✅ Predictions saved to 'submission.csv'")

✅ Predictions saved to 'submission.csv'
