In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import math
from catboost import CatBoostClassifier

CONFIG = {
    'batch_size': 1024,
    'lr': 1e-3,
    'weight_decay': 1e-4,
    'epochs': 50,
    'patience': 10,
    'device': torch.device("cuda" if torch.cuda.is_available() else "cpu")
}
print(f"Using device: {CONFIG['device']}")
FEATURES = {
    'continuous': ['Age', 'BP', 'Cholesterol', 'Max HR', 'ST depression'],
    'ordinal': ['Chest pain type', 'EKG results', 'Slope of ST', 'Number of vessels fluro', 'Thallium'],
    'binary': ['Sex', 'FBS over 120', 'Exercise angina'],
    'target': 'Heart Disease'
}

Using device: cuda


In [2]:
class PeriodicEmbedding(nn.Module):
    def __init__(self, frequency_num=16, output_dim=8, sigma=0.1):
        super().__init__()
        self.k = frequency_num
        self.c = nn.Parameter(torch.randn(frequency_num) * sigma)
        self.linear = nn.Linear(frequency_num * 2, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        v = 2 * math.pi * self.c * x
        out = torch.cat([torch.sin(v), torch.cos(v)], dim=1) 
        out = self.linear(out)
        out = self.relu(out)
        return out

class PiecewiseLinearEmbedding(nn.Module):
    def __init__(self, bin_edges, output_dim=4):
        super().__init__()
        self.register_buffer('bin_edges', bin_edges)
        num_bins = len(bin_edges) - 1
        self.linear = nn.Linear(num_bins, output_dim)
        
    def forward(self, x):
        edges = self.bin_edges
        widths = edges[1:] - edges[:-1]
        lower = edges[:-1]
        x_expanded = x - lower
        encoding = x_expanded / (widths + 1e-6)
        encoding = torch.clamp(encoding, 0.0, 1.0)
        out = self.linear(encoding)
        return out

In [3]:
class HeartDataset(Dataset):
    def __init__(self, df, feature_groups):
        self.df = df
        self.feats = feature_groups
        
        self.cont_data = df[self.feats['continuous']].values.astype(np.float32)
        self.ord_data = df[self.feats['ordinal']].values.astype(np.float32)
        self.bin_data = df[self.feats['binary']].values.astype(np.float32)
        
        if self.feats['target'] in df.columns:
            self.labels = df[self.feats['target']].values.astype(np.float32).reshape(-1, 1)
        else:
            self.labels = np.zeros((len(df), 1))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return {
            'cont': torch.tensor(self.cont_data[idx]),
            'ord': torch.tensor(self.ord_data[idx]),
            'bin': torch.tensor(self.bin_data[idx]),
            'label': torch.tensor(self.labels[idx])
        }

def prepare_data():
    train_full = pd.read_csv(r'C:\Users\Saswat Balyan\dev\Predicting-Heart-Disease-Playground-Series-S6ep2\playground-series-s6e2\train.csv')
    test_df = pd.read_csv(r'C:\Users\Saswat Balyan\dev\Predicting-Heart-Disease-Playground-Series-S6ep2\playground-series-s6e2\test.csv') 
    
    train_full['Heart Disease'] = train_full['Heart Disease'].map({'Absence': 0, 'Presence': 1})
    
    train_df, val_df = train_test_split(train_full, test_size=0.2, random_state=42, stratify=train_full['Heart Disease'])
    
    ordinal_edges = {}
    for col in FEATURES['ordinal']:
        edges = np.quantile(train_df[col].dropna(), np.linspace(0, 1, 9))
        if len(np.unique(edges)) < len(edges):
            edges = np.unique(edges)
        ordinal_edges[col] = torch.tensor(edges, dtype=torch.float32)
        
    return train_df, val_df, test_df, ordinal_edges

train_df, val_df, test_df, ordinal_edges = prepare_data()

train_dataset = HeartDataset(train_df, FEATURES)
val_dataset = HeartDataset(val_df, FEATURES)
test_dataset = HeartDataset(test_df, FEATURES)

train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], shuffle=False)

In [4]:
class TabularHeartModel(nn.Module):
    def __init__(self, ordinal_edges_dict):
        super().__init__()
        
        self.cont_embeddings = nn.ModuleDict()
        for feat in FEATURES['continuous']:
            self.cont_embeddings[feat] = PeriodicEmbedding(frequency_num=16, output_dim=8, sigma=0.1)
            
        self.ord_embeddings = nn.ModuleDict()
        for feat in FEATURES['ordinal']:
            edges = ordinal_edges_dict[feat]
            self.ord_embeddings[feat] = PiecewiseLinearEmbedding(bin_edges=edges, output_dim=4)
            
        input_dim = (len(FEATURES['continuous']) * 8) + \
                    (len(FEATURES['ordinal']) * 4) + \
                    len(FEATURES['binary'])
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 1) 
        )

    def forward(self, x_cont, x_ord, x_bin):
        embeddings = []
        for i, feat_name in enumerate(FEATURES['continuous']):
            val = x_cont[:, i:i+1]
            emb = self.cont_embeddings[feat_name](val)
            embeddings.append(emb)
            
        for i, feat_name in enumerate(FEATURES['ordinal']):
            val = x_ord[:, i:i+1]
            emb = self.ord_embeddings[feat_name](val)
            embeddings.append(emb)
            
        embeddings.append(x_bin)
        x = torch.cat(embeddings, dim=1)
        return x, self.mlp(x)

In [5]:
model = TabularHeartModel(ordinal_edges).to(CONFIG['device'])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])

best_val_loss = float('inf')
patience_counter = 0

print("Training Embeddings...")

for epoch in range(CONFIG['epochs']):
    model.train()
    for batch in train_loader:
        b_cont = batch['cont'].to(CONFIG['device'])
        b_ord = batch['ord'].to(CONFIG['device'])
        b_bin = batch['bin'].to(CONFIG['device'])
        labels = batch['label'].to(CONFIG['device'])
        
        optimizer.zero_grad()
        _, logits = model(b_cont, b_ord, b_bin)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            b_cont = batch['cont'].to(CONFIG['device'])
            b_ord = batch['ord'].to(CONFIG['device'])
            b_bin = batch['bin'].to(CONFIG['device'])
            labels = batch['label'].to(CONFIG['device'])
            _, logits = model(b_cont, b_ord, b_bin)
            val_loss += criterion(logits, labels).item()
            
    avg_val_loss = val_loss / len(val_loader)
    
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model_cat.pth')
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            break
            
model.load_state_dict(torch.load('best_model_cat.pth'))
print("Embedding Training Complete.")

Training Embeddings...
Embedding Training Complete.


  model.load_state_dict(torch.load('best_model_cat.pth'))


In [6]:
def extract_embeddings(loader, model, device):
    model.eval()
    embeddings_list = []
    labels_list = []
    
    with torch.no_grad():
        for batch in loader:
            b_cont = batch['cont'].to(device)
            b_ord = batch['ord'].to(device)
            b_bin = batch['bin'].to(device)
            
            features, _ = model(b_cont, b_ord, b_bin)
            embeddings_list.append(features.cpu().numpy())
            labels_list.append(batch['label'].numpy())
            
    return np.vstack(embeddings_list), np.vstack(labels_list).ravel()

X_train_emb, y_train_emb = extract_embeddings(train_loader, model, CONFIG['device'])
X_val_emb, y_val_emb = extract_embeddings(val_loader, model, CONFIG['device'])
X_test_emb, _ = extract_embeddings(test_loader, model, CONFIG['device'])

cat_model = CatBoostClassifier(
    iterations=6000,
    learning_rate=0.015,
    depth=6,
    l2_leaf_reg=6,
    loss_function='Logloss',
    eval_metric='AUC',
    random_seed=42,
    verbose=200
)

cat_model.fit(
    X_train_emb, y_train_emb,
    eval_set=(X_val_emb, y_val_emb),
    early_stopping_rounds=200
)

0:	test: 0.9382752	best: 0.9382752 (0)	total: 154ms	remaining: 15m 24s
200:	test: 0.9536859	best: 0.9536859 (200)	total: 7.31s	remaining: 3m 30s
400:	test: 0.9549827	best: 0.9549827 (400)	total: 14.5s	remaining: 3m 22s
600:	test: 0.9554874	best: 0.9554874 (600)	total: 21.7s	remaining: 3m 14s
800:	test: 0.9557629	best: 0.9557629 (800)	total: 29.1s	remaining: 3m 8s
1000:	test: 0.9559760	best: 0.9559760 (1000)	total: 36.7s	remaining: 3m 3s
1200:	test: 0.9560985	best: 0.9560985 (1200)	total: 44.2s	remaining: 2m 56s
1400:	test: 0.9561843	best: 0.9561843 (1400)	total: 51.3s	remaining: 2m 48s
1600:	test: 0.9562213	best: 0.9562213 (1600)	total: 59.2s	remaining: 2m 42s
1800:	test: 0.9562461	best: 0.9562463 (1797)	total: 1m 6s	remaining: 2m 34s
2000:	test: 0.9562592	best: 0.9562600 (1985)	total: 1m 13s	remaining: 2m 26s
2200:	test: 0.9562688	best: 0.9562688 (2199)	total: 1m 20s	remaining: 2m 18s
2400:	test: 0.9562768	best: 0.9562769 (2376)	total: 1m 27s	remaining: 2m 10s
2600:	test: 0.9562841	be

<catboost.core.CatBoostClassifier at 0x1a8b289f250>

In [7]:
preds = cat_model.predict_proba(X_test_emb)[:, 1]

submission = pd.DataFrame({
    'id': test_df['id'],
    'Heart Disease': preds
})

submission.to_csv('submission_catboost.csv', index=False)
print("CatBoost Submission Saved!")
print(submission.head())

CatBoost Submission Saved!
       id  Heart Disease
0  630000       0.952576
1  630001       0.007733
2  630002       0.990484
3  630003       0.003623
4  630004       0.190172


Single optimization 

In [None]:
import optuna
from catboost import CatBoostClassifier

def objective(trial):
    params = {
        "iterations": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
        "bootstrap_type": trial.suggest_categorical("bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]),
        "random_strength": trial.suggest_float("random_strength", 1e-8, 10.0, log=True),
        "eval_metric": "AUC",
        "loss_function": "Logloss",
        "od_type": "Iter",
        "od_wait": 50,
        "verbose": False,
        "random_seed": 42
    }

    if params["bootstrap_type"] == "Bayesian":
        params["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0.0, 10.0)
    else:
        params["subsample"] = trial.suggest_float("subsample", 0.1, 1.0)

    model = CatBoostClassifier(**params)
    model.fit(
        X_train_emb,
        y_train_emb,
        eval_set=(X_val_emb, y_val_emb),
        use_best_model=True
    )

    evals = model.get_evals_result()["validation"]["AUC"]
    for step, auc in enumerate(evals):
        trial.report(auc, step)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return max(evals)


study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(
        multivariate=True,
        gamma=lambda n: int(0.13 * n),
        n_startup_trials=75
    ),
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=20,
        n_warmup_steps=50
    )
)

study.optimize(objective, n_trials=100)

print(study.best_value)
print(study.best_params)


Double optimization

In [14]:
import optuna
from catboost import CatBoostClassifier

def objective(trial, stage_params=None):
    if stage_params is None:
        params = {
            "iterations": 1000,
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "depth": trial.suggest_int("depth", 4, 10),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
            "bootstrap_type": trial.suggest_categorical("bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]),
            "random_strength": trial.suggest_float("random_strength", 1e-8, 10.0, log=True),
        }
        if params["bootstrap_type"] == "Bayesian":
            params["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0.0, 10.0)
        else:
            params["subsample"] = trial.suggest_float("subsample", 0.1, 1.0)
    else:
        params = {
            "iterations": 1000,
            "learning_rate": trial.suggest_float("learning_rate", max(1e-3, stage_params["learning_rate"] * 0.7), min(0.1, stage_params["learning_rate"] * 1.3), log=True),
            "depth": trial.suggest_int("depth", max(4, stage_params["depth"] - 1), min(10, stage_params["depth"] + 1)),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", max(1.0, stage_params["l2_leaf_reg"] * 0.8), min(20.0, stage_params["l2_leaf_reg"] * 1.2)),
            "bootstrap_type": stage_params["bootstrap_type"], 
            "random_strength": trial.suggest_float("random_strength", max(1e-9, stage_params["random_strength"] * 0.7), min(10.0, stage_params["random_strength"] * 1.3), log=True),
        }
        if params["bootstrap_type"] == "Bayesian":
            params["bagging_temperature"] = trial.suggest_float("bagging_temperature", max(0.0, stage_params["bagging_temperature"] * 0.8), min(10.0, stage_params["bagging_temperature"] * 1.2))
        else:
            params["subsample"] = trial.suggest_float("subsample", max(0.1, stage_params["subsample"] * 0.9), min(1.0, stage_params["subsample"] * 1.1))

    params.update({"eval_metric": "AUC", "loss_function": "Logloss", "od_type": "Iter", "od_wait": 50, "verbose": False, "random_seed": 42})

    model = CatBoostClassifier(**params)
    model.fit(X_train_emb, y_train_emb, eval_set=(X_val_emb, y_val_emb), use_best_model=True)

    evals = model.get_evals_result()["validation"]["AUC"]
    for step, auc in enumerate(evals):
        trial.report(auc, step)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return max(evals)

study_stage1 = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(multivariate=True, n_startup_trials=60, gamma=lambda n: int(0.25 * n)),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=20, n_warmup_steps=50)
)
study_stage1.optimize(lambda t: objective(t), n_trials=125)

best_p1 = study_stage1.best_params
study_stage2 = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(multivariate=True, n_startup_trials=20, gamma=lambda n: int(0.08 * n)),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=10, n_warmup_steps=25)
)

study_stage2.add_trials(study_stage1.trials)
study_stage2.optimize(lambda t: objective(t, stage_params=best_p1), n_trials=100)

print(f"Final Best Score: {study_stage2.best_value}")
print(f"Final Best Params: {study_stage2.best_params}")

  optuna_warn(
[32m[I 2026-02-10 15:36:54,416][0m A new study created in memory with name: no-name-f21834b0-b28f-447e-a71c-66a336b60f72[0m
[32m[I 2026-02-10 15:37:25,531][0m Trial 0 finished with value: 0.9556436749772993 and parameters: {'learning_rate': 0.011456942031850988, 'depth': 6, 'l2_leaf_reg': 1.9994384937859613, 'bootstrap_type': 'Bernoulli', 'random_strength': 0.0008309048938772426, 'subsample': 0.2520420885463719}. Best is trial 0 with value: 0.9556436749772993.[0m
[32m[I 2026-02-10 15:39:14,573][0m Trial 1 finished with value: 0.9542218896678077 and parameters: {'learning_rate': 0.002008860113065908, 'depth': 10, 'l2_leaf_reg': 5.069562319549918, 'bootstrap_type': 'MVS', 'random_strength': 1.324937259705839e-06, 'subsample': 0.7649932196537125}. Best is trial 0 with value: 0.9556436749772993.[0m
[32m[I 2026-02-10 15:40:01,496][0m Trial 2 finished with value: 0.9549213056959043 and parameters: {'learning_rate': 0.006550709714998807, 'depth': 5, 'l2_leaf_reg': 3.

Final Best Score: 0.9563314851331536
Final Best Params: {'learning_rate': 0.09230069920687667, 'depth': 4, 'l2_leaf_reg': 5.391661726658544, 'random_strength': 0.0003914743928618374, 'bagging_temperature': 0.02629817006787384}


In [15]:
final_params = study_stage2.best_params.copy()
final_params.update({
    "iterations": 8000, 
    "verbose": 500,
    "eval_metric": "AUC",
    "loss_function": "Logloss",
    "random_seed": 42
})

final_cat_model = CatBoostClassifier(**final_params)

final_cat_model.fit(
    X_train_emb, y_train_emb,
    eval_set=(X_val_emb, y_val_emb),
    early_stopping_rounds=300,
    use_best_model=True
)

final_preds = final_cat_model.predict_proba(X_test_emb)[:, 1]
submission_bayesian = pd.DataFrame({
    'id': test_df['id'],
    'Heart Disease': final_preds
})
submission_bayesian.to_csv('submission_cat_bayesian.csv', index=False)
print("\n Submission Saved")

0:	test: 0.9215424	best: 0.9215424 (0)	total: 30.3ms	remaining: 4m 2s
500:	test: 0.9562714	best: 0.9562714 (500)	total: 14.6s	remaining: 3m 38s
1000:	test: 0.9563151	best: 0.9563190 (944)	total: 30.4s	remaining: 3m 32s
Stopped by overfitting detector  (300 iterations wait)

bestTest = 0.9563189897
bestIteration = 944

Shrink model to first 945 iterations.

 Submission Saved
