In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer
import math

CONFIG = {
    'batch_size': 1024,
    'lr': 1e-3,
    'weight_decay': 1e-4,
    'epochs': 50,
    'patience': 10,
    'device': torch.device("cuda" if torch.cuda.is_available() else "cpu")
}

FEATURES = {
    'continuous': ['Age', 'BP', 'Cholesterol', 'Max HR', 'ST depression'],
    'ordinal': ['Chest pain type', 'EKG results', 'Slope of ST', 'Number of vessels fluro', 'Thallium'],
    'binary': ['Sex', 'FBS over 120', 'Exercise angina'],
    'target': 'Heart Disease'
}

print(f"Using device: {CONFIG['device']}")

Using device: cuda


In [None]:
class PeriodicEmbedding(nn.Module):
    """
    Continuous features -> Periodic + Linear + ReLU
    Implements v = 2*pi*c*x, then concat(sin(v), cos(v))
    """
    def __init__(self, frequency_num=16, output_dim=8, sigma=0.1):
        super().__init__()
        self.k = frequency_num

        self.c = nn.Parameter(torch.randn(frequency_num) * sigma)

        self.linear = nn.Linear(frequency_num * 2, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        
        v = 2 * math.pi * self.c * x

        out = torch.cat([torch.sin(v), torch.cos(v)], dim=1) 
        
        out = self.linear(out)
        out = self.relu(out)
        return out

class PiecewiseLinearEmbedding(nn.Module):

    def __init__(self, bin_edges, output_dim=4):
        super().__init__()
        self.register_buffer('bin_edges', bin_edges)
        
        num_bins = len(bin_edges) - 1
        self.linear = nn.Linear(num_bins, output_dim)
        
    def forward(self, x):
        edges = self.bin_edges
        
        widths = edges[1:] - edges[:-1]
        
        lower = edges[:-1]
        
        x_expanded = x - lower
        
        encoding = x_expanded / (widths + 1e-6)
        
        encoding = torch.clamp(encoding, 0.0, 1.0)
        
        out = self.linear(encoding)
        return out

In [None]:
class HeartDataset(Dataset):
    def __init__(self, df, feature_groups):
        self.df = df
        self.feats = feature_groups
        
        self.cont_data = df[self.feats['continuous']].values.astype(np.float32)
        self.ord_data = df[self.feats['ordinal']].values.astype(np.float32)
        self.bin_data = df[self.feats['binary']].values.astype(np.float32)
        
        if self.feats['target'] in df.columns:
            self.labels = df[self.feats['target']].values.astype(np.float32).reshape(-1, 1)
        else:
            self.labels = np.zeros((len(df), 1))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return {
            'cont': torch.tensor(self.cont_data[idx]),
            'ord': torch.tensor(self.ord_data[idx]),
            'bin': torch.tensor(self.bin_data[idx]),
            'label': torch.tensor(self.labels[idx])
        }

def prepare_data():
    
    train_full = pd.read_csv('C:\\Users\\Saswat Balyan\\dev\\Predicting-Heart-Disease-Playground-Series-S6ep2\\playground-series-s6e2\\train.csv')
    test_df = pd.read_csv('C:\\Users\\Saswat Balyan\\dev\\Predicting-Heart-Disease-Playground-Series-S6ep2\\playground-series-s6e2\\test.csv') # For inference if needed
    
    train_full['Heart Disease'] = train_full['Heart Disease'].map({'Absence': 0, 'Presence': 1})
    
    train_df, val_df = train_test_split(train_full, test_size=0.2, random_state=42, stratify=train_full['Heart Disease'])
    
    ordinal_edges = {}
    for col in FEATURES['ordinal']:
        edges = np.quantile(train_df[col].dropna(), np.linspace(0, 1, 9))
        
        if len(np.unique(edges)) < len(edges):
            edges = np.unique(edges)
        
        ordinal_edges[col] = torch.tensor(edges, dtype=torch.float32)
        
    return train_df, val_df, ordinal_edges

train_df, val_df, ordinal_edges = prepare_data()

train_dataset = HeartDataset(train_df, FEATURES)
val_dataset = HeartDataset(val_df, FEATURES)

train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], shuffle=False)

In [None]:
class TabularHeartModel(nn.Module):
    def __init__(self, ordinal_edges_dict):
        super().__init__()
        
        self.cont_embeddings = nn.ModuleDict()
        for feat in FEATURES['continuous']:

            self.cont_embeddings[feat] = PeriodicEmbedding(frequency_num=16, output_dim=8, sigma=0.1)
            
        self.ord_embeddings = nn.ModuleDict()
        for feat in FEATURES['ordinal']:
            edges = ordinal_edges_dict[feat]

            self.ord_embeddings[feat] = PiecewiseLinearEmbedding(bin_edges=edges, output_dim=4)
            
        input_dim = (len(FEATURES['continuous']) * 8) + \
                    (len(FEATURES['ordinal']) * 4) + \
                    len(FEATURES['binary'])
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            
            nn.Linear(256, 128),
            nn.ReLU(),
            
            nn.Linear(128, 1) 
        )

    def forward(self, x_cont, x_ord, x_bin):
        embeddings = []
        
        for i, feat_name in enumerate(FEATURES['continuous']):
            val = x_cont[:, i:i+1] # (Batch, 1)
            emb = self.cont_embeddings[feat_name](val)
            embeddings.append(emb)
            
        for i, feat_name in enumerate(FEATURES['ordinal']):
            val = x_ord[:, i:i+1]
            emb = self.ord_embeddings[feat_name](val)
            embeddings.append(emb)
            
        embeddings.append(x_bin)
        
        x = torch.cat(embeddings, dim=1)
        
        logits = self.mlp(x)
        return logits

In [None]:
model = TabularHeartModel(ordinal_edges).to(CONFIG['device'])

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])

best_val_loss = float('inf')
patience_counter = 0

print("Starting Training...")

for epoch in range(CONFIG['epochs']):
    
    model.train()
    train_loss = 0
    for batch in train_loader:
        
        b_cont = batch['cont'].to(CONFIG['device'])
        b_ord = batch['ord'].to(CONFIG['device'])
        b_bin = batch['bin'].to(CONFIG['device'])
        labels = batch['label'].to(CONFIG['device'])
        
        optimizer.zero_grad()
        
        logits = model(b_cont, b_ord, b_bin)
        loss = criterion(logits, labels)
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    avg_train_loss = train_loss / len(train_loader)
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            b_cont = batch['cont'].to(CONFIG['device'])
            b_ord = batch['ord'].to(CONFIG['device'])
            b_bin = batch['bin'].to(CONFIG['device'])
            labels = batch['label'].to(CONFIG['device'])
            
            logits = model(b_cont, b_ord, b_bin)
            loss = criterion(logits, labels)
            val_loss += loss.item()
            
    avg_val_loss = val_loss / len(val_loader)
    
    print(f"Epoch {epoch+1}/{CONFIG['epochs']} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print("Early stopping triggered.")
            break

print("Training Complete.")

Starting Training...
Epoch 1/50 | Train Loss: 0.3038 | Val Loss: 0.2795
Epoch 2/50 | Train Loss: 0.2819 | Val Loss: 0.2781
Epoch 3/50 | Train Loss: 0.2794 | Val Loss: 0.2748
Epoch 4/50 | Train Loss: 0.2781 | Val Loss: 0.2739
Epoch 5/50 | Train Loss: 0.2773 | Val Loss: 0.2785
Epoch 6/50 | Train Loss: 0.2774 | Val Loss: 0.2739
Epoch 7/50 | Train Loss: 0.2770 | Val Loss: 0.2734
Epoch 8/50 | Train Loss: 0.2765 | Val Loss: 0.2722
Epoch 9/50 | Train Loss: 0.2764 | Val Loss: 0.2737
Epoch 10/50 | Train Loss: 0.2763 | Val Loss: 0.2733
Epoch 11/50 | Train Loss: 0.2765 | Val Loss: 0.2842
Epoch 12/50 | Train Loss: 0.2760 | Val Loss: 0.2842
Epoch 13/50 | Train Loss: 0.2764 | Val Loss: 0.2716
Epoch 14/50 | Train Loss: 0.2759 | Val Loss: 0.2776
Epoch 15/50 | Train Loss: 0.2756 | Val Loss: 0.2719
Epoch 16/50 | Train Loss: 0.2763 | Val Loss: 0.2732
Epoch 17/50 | Train Loss: 0.2755 | Val Loss: 0.2716
Epoch 18/50 | Train Loss: 0.2752 | Val Loss: 0.2705
Epoch 19/50 | Train Loss: 0.2754 | Val Loss: 0.2763


In [None]:
test_df = pd.read_csv('C:\\Users\\Saswat Balyan\\dev\\Predicting-Heart-Disease-Playground-Series-S6ep2\\playground-series-s6e2\\test.csv')

test_dataset = HeartDataset(test_df, FEATURES)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], shuffle=False)

model_inference = TabularHeartModel(ordinal_edges).to(CONFIG['device'])
model_inference.load_state_dict(torch.load('best_model.pth'))
model_inference.eval()

all_preds = []
all_ids = test_df['id'].values

print("Starting Inference on Test Set...")
with torch.no_grad():
    for batch in test_loader:
        b_cont = batch['cont'].to(CONFIG['device'])
        b_ord = batch['ord'].to(CONFIG['device'])
        b_bin = batch['bin'].to(CONFIG['device'])
        
        logits = model_inference(b_cont, b_ord, b_bin)
        
        probs = torch.sigmoid(logits)
        
        all_preds.extend(probs.cpu().numpy().flatten())

submission = pd.DataFrame({
    'id': all_ids,
    'Heart Disease': all_preds
})

submission.to_csv('submission.csv', index=False)
print("Success! 'submission.csv' generated.")
print(submission.head())

  model_inference.load_state_dict(torch.load('best_model.pth'))


Starting Inference on Test Set...
Success! 'submission.csv' generated.
       id  Heart Disease
0  630000       0.951658
1  630001       0.005138
2  630002       0.983272
3  630003       0.005745
4  630004       0.227253
