# TabularResNet for TDE Classification

This notebook implements a ResNet-style neural network for tabular data.

**Architecture:**
- Input Layer: Linear (input_dim -> hidden_dim)
- ResNet Blocks: 1-3 blocks with skip connections
- Output Layer: Linear (hidden_dim -> 1)

**Training:**
- Optuna hyperparameter tuning with MedianPruner
- BCEWithLogitsLoss with pos_weight for class imbalance
- Bagging: 5 models per fold with different seeds
- Generates individual model submission

In [None]:
import torch
print(torch.__version__)           # Should say something like '2.5.1+cu124'
print(torch.cuda.is_available())   # Should return True

In [None]:
import os
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import optuna
from sklearn.metrics import precision_recall_curve
import warnings

warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Configuration
N_OPTUNA_TRIALS = 30
N_EPOCHS_TUNING = 50
N_EPOCHS_FINAL = 100
N_BAGS = 5  # Models per fold for bagging
BATCH_SIZE = 64
MODEL_NAME = 'resnet'

# Paths
DATA_DIR = os.path.join('..', 'data', 'processed')
MODEL_DIR = os.path.join('..', 'models')
SUBMISSION_DIR = os.path.join('..', 'submissions')
TRAIN_PATH = os.path.join(DATA_DIR, 'train_processed_nn_further.parquet')
TEST_PATH = os.path.join(DATA_DIR, 'test_processed_nn_further.parquet')
FOLDS_PATH = os.path.join(DATA_DIR, 'train_folds.csv')

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(SUBMISSION_DIR, exist_ok=True)

In [None]:
# Load data
print("Loading NN-processed data...")
train = pd.read_parquet(TRAIN_PATH)
test = pd.read_parquet(TEST_PATH)
folds = pd.read_csv(FOLDS_PATH)

# Merge folds
train = train.merge(folds[['object_id', 'kfold']], on='object_id', how='left')

print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")
print(f"Class distribution: {train['target'].value_counts().to_dict()}")

In [None]:
# Prepare features
drop_cols = ['object_id', 'target', 'split', 'SpecType', 'kfold']
feature_cols = [c for c in train.columns if c not in drop_cols]

X = train[feature_cols].values.astype(np.float32)
y = train['target'].values.astype(np.float32)
kfold = train['kfold'].values

X_test = test[feature_cols].values.astype(np.float32)
object_ids_test = test['object_id']

# Calculate pos_weight for imbalance (~19.5)
pos_weight = (y == 0).sum() / (y == 1).sum()
print(f"Feature count: {len(feature_cols)}")
print(f"pos_weight: {pos_weight:.2f}")

INPUT_DIM = len(feature_cols)

In [None]:
class ResNetBlock(nn.Module):
    """A single ResNet block for tabular data."""
    def __init__(self, hidden_dim, dropout):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.PReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.PReLU(),
            nn.Dropout(dropout)
        )
    
    def forward(self, x):
        return x + self.block(x)  # Skip connection


class TabularResNet(nn.Module):
    """ResNet-style architecture for tabular data."""
    def __init__(self, input_dim, hidden_dim, n_blocks, dropout):
        super().__init__()
        
        # Input layer
        self.input_layer = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.PReLU(),
            nn.Dropout(dropout)
        )
        
        # ResNet blocks
        self.blocks = nn.ModuleList([
            ResNetBlock(hidden_dim, dropout) for _ in range(n_blocks)
        ])
        
        # Output layer
        self.output_layer = nn.Linear(hidden_dim, 1)
    
    def forward(self, x):
        x = self.input_layer(x)
        for block in self.blocks:
            x = block(x)
        return self.output_layer(x).squeeze(-1)

In [None]:
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


def evaluate(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        X_val_t = torch.FloatTensor(X_val).to(device)
        preds = torch.sigmoid(model(X_val_t)).cpu().numpy()
    
    prec, rec, _ = precision_recall_curve(y_val, preds)
    f1 = 2 * (prec * rec) / (prec + rec + 1e-9)
    return np.max(f1), preds

In [None]:
def objective(trial):
    # Hyperparameters to tune
    hidden_dim = trial.suggest_categorical('hidden_dim', [64, 128, 256, 512])
    n_blocks = trial.suggest_int('n_blocks', 1, 3)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    
    # Global OOF Evaluation: Initialize OOF predictions array
    oof_preds = np.zeros(len(y))
    
    for fold in range(5):
        train_idx = kfold != fold
        val_idx = kfold == fold
        
        X_tr, X_val = X[train_idx], X[val_idx]
        y_tr, y_val = y[train_idx], y[val_idx]
        
        # Create DataLoader
        train_dataset = TensorDataset(
            torch.FloatTensor(X_tr),
            torch.FloatTensor(y_tr)
        )
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        # Create model
        model = TabularResNet(INPUT_DIM, hidden_dim, n_blocks, dropout).to(device)
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
        
        # Train
        for epoch in range(N_EPOCHS_TUNING):
            train_epoch(model, train_loader, criterion, optimizer)
        
        # Store predictions globally (do NOT calculate F1 here)
        _, preds = evaluate(model, X_val, y_val)
        oof_preds[val_idx] = preds
        
        # Pruning - use intermediate F1 for pruning only
        prec_tmp, rec_tmp, _ = precision_recall_curve(y[val_idx], preds)
        f1_tmp = 2 * (prec_tmp * rec_tmp) / (prec_tmp + rec_tmp + 1e-9)
        trial.report(np.max(f1_tmp), fold)
        if trial.should_prune():
            raise optuna.TrialPruned()
    
    # Calculate Global Metric (outside loop)
    # Optimize threshold on the full dataset
    prec, rec, thresholds = precision_recall_curve(y, oof_preds)
    f1_scores = 2 * (prec * rec) / (prec + rec + 1e-9)
    best_f1 = np.max(f1_scores)
    
    return best_f1

In [None]:
# Run Optuna optimization
print(f"Running Optuna with {N_OPTUNA_TRIALS} trials...")
pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=2)
study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(objective, n_trials=N_OPTUNA_TRIALS, show_progress_bar=True)

print(f"\nBest F1 Score: {study.best_value:.4f}")
print(f"Best params: {study.best_params}")

In [None]:
# Train final models with bagging
print(f"\nTraining final models with bagging ({N_BAGS} models per fold)...")

best_params = study.best_params
oof_preds = np.zeros(len(y))
test_preds = np.zeros(len(X_test))

for fold in range(5):
    train_idx = kfold != fold
    val_idx = kfold == fold
    
    X_tr, X_val = X[train_idx], X[val_idx]
    y_tr, y_val = y[train_idx], y[val_idx]
    
    fold_val_preds = np.zeros(len(X_val))
    fold_test_preds = np.zeros(len(X_test))
    
    for bag in range(N_BAGS):
        seed = 15 + bag * 100
        torch.manual_seed(seed)
        np.random.seed(seed)
        
        # Create DataLoader
        train_dataset = TensorDataset(
            torch.FloatTensor(X_tr),
            torch.FloatTensor(y_tr)
        )
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        # Create model
        model = TabularResNet(
            INPUT_DIM,
            best_params['hidden_dim'],
            best_params['n_blocks'],
            best_params['dropout']
        ).to(device)
        
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=best_params['lr'],
            weight_decay=best_params['weight_decay']
        )
        
        # Train
        for epoch in range(N_EPOCHS_FINAL):
            train_epoch(model, train_loader, criterion, optimizer)
        
        # Predict
        _, val_pred = evaluate(model, X_val, y_val)
        fold_val_preds += val_pred / N_BAGS
        
        model.eval()
        with torch.no_grad():
            X_test_t = torch.FloatTensor(X_test).to(device)
            test_pred = torch.sigmoid(model(X_test_t)).cpu().numpy()
        fold_test_preds += test_pred / N_BAGS
    
    oof_preds[val_idx] = fold_val_preds
    test_preds += fold_test_preds / 5
    
    print(f"Fold {fold} complete.")

# Calculate final OOF F1
prec, rec, thresh = precision_recall_curve(y, oof_preds)
f1 = 2 * (prec[:-1] * rec[:-1]) / (prec[:-1] + rec[:-1] + 1e-9)
best_thresh = thresh[np.argmax(f1)]
print(f"\nOOF F1 Score: {np.max(f1):.4f} at threshold {best_thresh:.4f}")

In [None]:
# Save predictions
oof_df = pd.DataFrame({
    'object_id': train['object_id'],
    'target': y,
    f'pred_{MODEL_NAME}': oof_preds
})
oof_df.to_csv(os.path.join(MODEL_DIR, f'oof_{MODEL_NAME}.csv'), index=False)

test_df = pd.DataFrame({
    'object_id': object_ids_test,
    f'pred_{MODEL_NAME}': test_preds
})
test_df.to_csv(os.path.join(MODEL_DIR, f'preds_{MODEL_NAME}.csv'), index=False)

print(f"\nSaved OOF predictions to: models/oof_{MODEL_NAME}.csv")
print(f"Saved test predictions to: models/preds_{MODEL_NAME}.csv")

In [None]:
# Create submission file for this model
# Apply optimal threshold from OOF
test_binary = (test_preds >= best_thresh).astype(int)

# Create submission dataframe
submission = pd.DataFrame({
    'object_id': object_ids_test,
    'target': test_binary
})

submission_path = os.path.join(SUBMISSION_DIR, f'submission_{MODEL_NAME}.csv')
submission.to_csv(submission_path, index=False)

print(f"\n=== {MODEL_NAME.upper()} Submission ===")
print(f"Threshold: {best_thresh:.4f}")
print(f"Prediction distribution: {np.bincount(test_binary)}")
print(f"Positive rate: {test_binary.mean():.4f}")
print(f"Saved to: {submission_path}")