In [None]:
import warnings

warnings.simplefilter("ignore")  # 忽略警告信息
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn.functional as F
from torch import Tensor
import copy
from torch.utils.data import Dataset, DataLoader

from rtdl_revisiting_models import FTTransformer

warnings.resetwarnings()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import delu
delu.random.seed(999)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
from dataloader import load_data

Data directory: d:\DSS5104\data


In [3]:
def build_model(n_cont_features: int, cat_cardinalities: list, d_out: int):
    """
    Build an FT-Transformer model.
    n_cont_features: number of continuous (numeric) features
    cat_cardinalities: list of cardinalities for each categorical feature (empty if none)
    d_out: dimension of model output (e.g. number of classes or 1)
    """
    model = FTTransformer(
        n_cont_features=n_cont_features,
        cat_cardinalities=cat_cardinalities,
        d_out=d_out,
        **FTTransformer.get_default_kwargs()  # use default recommended hyperparameters
    ).to(device)
    return model

In [None]:
class TabularDataset(Dataset):
    def __init__(self, X_df, y, cat_cols, cont_cols, task_type, is_train=False, scaler=None, cat_categories=None):

        self.cat_cols = cat_cols
        self.cont_cols = cont_cols
        self.task_type = task_type

        X = X_df.copy()
        
        # 类别型特征处理：转换为categorical类型并编码
        self.cat_cardinalities = []      # 保存每个类别特征的基数（unique个数）
        self.cat_categories = {}         # 保存训练集中每个类别特征的类别值列表
        if self.cat_cols:
            for col in self.cat_cols:
                if is_train:
                    # 训练集：将特征转换为categorical并获取类别列表
                    X[col] = X[col].astype('category')
                    self.cat_categories[col] = X[col].cat.categories  # 保存类别值
                    self.cat_cardinalities.append(X[col].nunique())   # 唯一值数量作为类别基数
                else:
                    # 验证/测试集：若提供了训练集的类别列表，则使用它保证编码一致
                    if cat_categories is not None and col in cat_categories:
                        X[col] = pd.Categorical(X[col], categories=cat_categories[col])
                    else:
                        X[col] = X[col].astype('category')
                # 将类别值映射为编码 (0,...,n-1)，缺失或未知类别将被编码为 -1
                X[col] = X[col].cat.codes

        
        # 连续型特征处理：转换类型并标准化
        self.scaler = None
        if self.cont_cols:
            # 确保连续特征为float32类型
            X[self.cont_cols] = X[self.cont_cols].astype('float32')
            if is_train:
                # 拟合StandardScaler并应用于训练数据
                self.scaler = StandardScaler()
                X[self.cont_cols] = self.scaler.fit_transform(X[self.cont_cols])
            else:
                # 使用训练集的Scaler对验证/测试集进行变换
                X[self.cont_cols] = scaler.transform(X[self.cont_cols])
        
        # 保存处理后的特征为Tensor
        if self.cont_cols:
            # 连续特征转换为浮点Tensor
            self.X_cont = torch.tensor(X[self.cont_cols].values, dtype=torch.float32)
        else:
            # 若没有连续特征，则用None占位
            self.X_cont = None
        if self.cat_cols:
            # 类别特征转换为长整型Tensor
            self.X_cat = torch.tensor(X[self.cat_cols].values, dtype=torch.long)
        else:
            self.X_cat = None
        
        # 目标变量处理：根据任务类型选择dtype
        # 对于分类任务，默认使用long张量存储类别（若二分类且使用BCELoss，后续会转换为float）
        # 对于回归任务，使用float张量
        y_array = np.array(y)  # 将Series转换为numpy数组
        if task_type == "classification":
            # 检查y的数据类型，若已经是浮点（表示二分类），则用float32，否则用long
            target_dtype = torch.float32 if str(y_array.dtype).startswith('float') else torch.long
            self.y = torch.tensor(y_array, dtype=target_dtype)
        else:
            self.y = torch.tensor(y_array, dtype=torch.float32)
    
    def __len__(self):
        # 返回数据集样本数量
        return len(self.y)
    
    def __getitem__(self, idx):
        # 根据索引idx返回一个样本的特征和标签
        # 提取连续特征，如果没有连续特征则返回空Tensor
        x_cont = self.X_cont[idx] if self.X_cont is not None else None
        # 提取类别特征，如果没有类别特征则返回None
        x_cat = self.X_cat[idx] if self.X_cat is not None else None
        y = self.y[idx]
        return x_cont, x_cat, y


def collate_fn(batch):
    """
    Combine the list samples of a batch (batch) into one batch output.
    The batch argument is a list containing a number of tuples (x_cont, x_cat, y) returned by __getitem__ from Dataset.
    This function stacks these samples into a batch tensor and handles the None case.
    """
    x_cont_list, x_cat_list, y_list = [], [], []
    for (x_cont, x_cat, y) in batch:
        # Add continuous features and labels to the list
        if x_cont is not None:
            x_cont_list.append(x_cont)
        y_list.append(y)
        # Add the category feature to the list (if it exists)
        if x_cat is not None:
            x_cat_list.append(x_cat)
    # Stacking lists as tensor
    x_cont_batch = torch.stack(x_cont_list) if len(x_cont_list) > 0 else None
    y_batch = torch.stack(y_list)

    if len(x_cat_list) > 0:
        x_cat_batch = torch.stack(x_cat_list)
    else:
        x_cat_batch = None
    return x_cont_batch, x_cat_batch, y_batch


def prepare_data(dataset: str,batch_size: int = 256):   
    dataset = dataset.lower()

    if dataset.startswith("adult"):
        data_train, data_test = load_data("adult")
        X_train = data_train.drop(columns=['income'])
        y_train = data_train['income']
        X_val = data_test.drop(columns=['income'])
        y_val = data_test['income']
        y_train = (y_train == '>50K').astype(int)
        y_val = (y_val == '>50K').astype(int)
        task_type = "classification"
        
    elif dataset.startswith("california"):
        X_train, X_val, y_train, y_val = load_data("california")
        task_type = "regression"
        
    elif dataset.startswith("higgs"):
        X_train, X_val, y_train, y_val = load_data("higgs")
        y_train = (y_train == 1).astype(int)
        y_val = (y_val == 1).astype(int)
        task_type = "classification"
        
    elif dataset.startswith("churn"):
        X_train, X_val, y_train, y_val = load_data("churn")
        y_train = (y_train == 'Yes').astype(int)
        y_val = (y_val == 'Yes').astype(int)
        task_type = "classification"
        
    elif dataset.startswith("creditcard"):
        X_train, X_val, y_train, y_val = load_data("credit")
        y_train = (y_train == 1).astype(int)
        y_val = (y_val == 1).astype(int)
        task_type = "classification"
        
    elif dataset.startswith("poker"):
        X_train, X_val, y_train, y_val = load_data("poker") 
        task_type = "classification" 
        
    elif dataset.startswith("bank"):
        X_train, X_val, y_train, y_val = load_data("bank")
        y_train = (y_train == 'yes').astype(int)
        y_val = (y_val == 'yes').astype(int)
        task_type = "classification"
        
    elif dataset.startswith("wine"):
        X_train, X_val, y_train, y_val = load_data("wine")
        task_type = "regression"
        
    elif dataset.startswith("covtype"):
        X_train, X_val, y_train, y_val = load_data("covtype")
        task_type = "classification"
        
    else:
        raise ValueError(f"Unsupported dataset: {dataset}")
    

    cat_cols = X_train.select_dtypes(include=['category', 'object']).columns.tolist()
    cont_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    

    if task_type == "classification":
        y_train = y_train.astype('int64')
        y_val = y_val.astype('int64')

        n_classes = pd.Series(y_train).nunique() 
        
        d_out = 1 if n_classes == 2 else n_classes
        
        if d_out == 1:
            y_train = y_train.astype('float32')
            y_val = y_val.astype('float32')
    else:
        y_train = y_train.astype('float32')
        y_val = y_val.astype('float32')
        d_out = 1
    

    train_dataset = TabularDataset(X_train, y_train, cat_cols, cont_cols, task_type, is_train=True)
    val_dataset = TabularDataset(X_val, y_val, cat_cols, cont_cols, task_type, is_train=False,
                                 scaler=train_dataset.scaler, cat_categories=train_dataset.cat_categories)
    

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    
    n_cont_features = len(cont_cols)                          # Number of continuous features
    cat_cardinalities = train_dataset.cat_cardinalities       # List of bases for each category of features
    task_type = task_type
    
    data_loaders = {"train": train_loader, "val": val_loader}
    return data_loaders, n_cont_features, cat_cardinalities, d_out, task_type


In [None]:
def train_model(model, train_loader, val_loader, task_type, d_out, 
                n_epochs=100, batch_size=256, patience=10, lr=3e-4, weight_decay=1e-5):

    if task_type == "classification":
        loss_fn = F.binary_cross_entropy_with_logits if d_out == 1 else F.cross_entropy
    else:
        loss_fn = F.mse_loss  
    
    # optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0)
    best_val_score = None
    best_epoch = -1
    best_state = None
    patience_counter = 0

    maximize_metric = True if task_type == "classification" else False
    
    for epoch in range(1, n_epochs+1):
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_samples = 0

        for x_cont_batch, x_cat_batch, y_batch in train_loader:

            x_cont_batch = x_cont_batch.to(device) if x_cont_batch is not None else None
            x_cat_batch = x_cat_batch.to(device) if x_cat_batch is not None else None
            y_batch = y_batch.to(device)
  
            logits = model(x_cont_batch, x_cat_batch) if x_cat_batch is not None else model(x_cont_batch, None)

            if task_type == "classification" and d_out == 1:
                logits = logits.squeeze(-1)  
            elif task_type == "regression":
                logits = logits.squeeze(-1)  

            loss = loss_fn(logits, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_size_actual = y_batch.size(0)
            total_loss += loss.item() * batch_size_actual
            total_samples += batch_size_actual

            if task_type == "classification":
                if d_out == 1:
                    preds = (logits > 0).long()
                    targets = y_batch.long()  
                else:
                    preds = torch.argmax(logits, dim=1)
                    targets = y_batch
                total_correct += (preds == targets).sum().item()

        avg_train_loss = total_loss / total_samples

        train_acc = total_correct / total_samples if task_type == "classification" else None
        
        # Evaluate on a validation set
        model.eval()
        val_loss_total = 0.0
        val_correct = 0
        total_val_samples = 0
        with torch.no_grad():
            for x_cont_val, x_cat_val, y_val in val_loader:
                x_cont_val = x_cont_val.to(device) if x_cont_val is not None else None
                x_cat_val = x_cat_val.to(device) if x_cat_val is not None else None
                y_val = y_val.to(device)
   
                val_logits = model(x_cont_val, x_cat_val) if x_cat_val is not None else model(x_cont_val, None)
                if task_type == "classification" and d_out == 1:
                    val_logits = val_logits.squeeze(-1)
                elif task_type == "regression":
                    val_logits = val_logits.squeeze(-1)

                val_loss = loss_fn(val_logits, y_val)

                batch_val_size = y_val.size(0)
                val_loss_total += val_loss.item() * batch_val_size
                total_val_samples += batch_val_size
 
                if task_type == "classification":
                    if d_out == 1:

                        val_preds = (val_logits > 0).long()
                        val_targets = y_val.long()
                    else:

                        val_preds = torch.argmax(val_logits, dim=1)
                        val_targets = y_val
                    val_correct += (val_preds == val_targets).sum().item()

        avg_val_loss = val_loss_total / total_val_samples
        if task_type == "classification":
            val_acc = val_correct / total_val_samples
        else:
            val_acc = None  
        

        current_val_score = val_acc if task_type == "classification" else -avg_val_loss

        if best_val_score is None or (maximize_metric and current_val_score > best_val_score) or (not maximize_metric and current_val_score > best_val_score):
            best_val_score = current_val_score
            best_epoch = epoch
            best_state = copy.deepcopy(model.state_dict())
            patience_counter = 0
        else:
            patience_counter += 1
        

        if task_type == "classification":
            print(f"Epoch {epoch:03d}: Train Loss = {avg_train_loss:.4f}, Train Acc = {train_acc:.4f}, "
                  f"Val Loss = {avg_val_loss:.4f}, Val Acc = {val_acc:.4f}")
        else:
            train_rmse = np.sqrt(avg_train_loss)
            val_rmse = np.sqrt(avg_val_loss)
            print(f"Epoch {epoch:03d}: Train MSE = {avg_train_loss:.4f} (RMSE={train_rmse:.4f}), "
                  f"Val MSE = {avg_val_loss:.4f} (RMSE={val_rmse:.4f})")
        

        if patience_counter >= patience:
            print("Early stopping triggered.")
            break
    

    if best_state is not None:
        model.load_state_dict(best_state)
    results = {"best_epoch": best_epoch}
    if task_type == "classification":
        results["best_val_acc"] = val_acc if best_val_score == val_acc else (best_val_score if maximize_metric else None)
    else:
        results["best_val_loss"] = avg_val_loss if best_val_score == -avg_val_loss else (-best_val_score if not maximize_metric else None)
    return results

In [14]:
# churn dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("churn", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=1e-4)
print("Churn Results:", results)

binary classification
(5625, 19) (5625,)
(1407, 19) (1407,)
Epoch 001: Train Loss = 0.5010, Train Acc = 0.7502, Val Loss = 0.4384, Val Acc = 0.7939
Epoch 002: Train Loss = 0.4318, Train Acc = 0.7979, Val Loss = 0.4261, Val Acc = 0.7946
Epoch 003: Train Loss = 0.4206, Train Acc = 0.8052, Val Loss = 0.4238, Val Acc = 0.7939
Epoch 004: Train Loss = 0.4224, Train Acc = 0.7986, Val Loss = 0.4218, Val Acc = 0.8067
Epoch 005: Train Loss = 0.4204, Train Acc = 0.7988, Val Loss = 0.4273, Val Acc = 0.7982
Epoch 006: Train Loss = 0.4169, Train Acc = 0.8012, Val Loss = 0.4205, Val Acc = 0.8095
Epoch 007: Train Loss = 0.4130, Train Acc = 0.8069, Val Loss = 0.4188, Val Acc = 0.8031
Epoch 008: Train Loss = 0.4124, Train Acc = 0.8071, Val Loss = 0.4233, Val Acc = 0.7967
Epoch 009: Train Loss = 0.4136, Train Acc = 0.8028, Val Loss = 0.4242, Val Acc = 0.8003
Epoch 010: Train Loss = 0.4143, Train Acc = 0.8027, Val Loss = 0.4327, Val Acc = 0.7996
Epoch 011: Train Loss = 0.4136, Train Acc = 0.8036, Val Loss

In [18]:
# adult dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("adult", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("adult Results:", results)

binary classification
(30162, 15) (30162,)
(15060, 15) (15060,)
Epoch 001: Train Loss = 0.3496, Train Acc = 0.8333, Val Loss = 0.3197, Val Acc = 0.8474
Epoch 002: Train Loss = 0.3180, Train Acc = 0.8525, Val Loss = 0.3152, Val Acc = 0.8490
Epoch 003: Train Loss = 0.3122, Train Acc = 0.8558, Val Loss = 0.3117, Val Acc = 0.8553
Epoch 004: Train Loss = 0.3111, Train Acc = 0.8546, Val Loss = 0.3113, Val Acc = 0.8553
Epoch 005: Train Loss = 0.3085, Train Acc = 0.8553, Val Loss = 0.3103, Val Acc = 0.8542
Epoch 006: Train Loss = 0.3076, Train Acc = 0.8555, Val Loss = 0.3105, Val Acc = 0.8552
Epoch 007: Train Loss = 0.3053, Train Acc = 0.8596, Val Loss = 0.3285, Val Acc = 0.8548
Epoch 008: Train Loss = 0.3047, Train Acc = 0.8586, Val Loss = 0.3098, Val Acc = 0.8535
Epoch 009: Train Loss = 0.3025, Train Acc = 0.8589, Val Loss = 0.3143, Val Acc = 0.8550
Epoch 010: Train Loss = 0.3012, Train Acc = 0.8606, Val Loss = 0.3129, Val Acc = 0.8535
Epoch 011: Train Loss = 0.3039, Train Acc = 0.8571, Val 

In [12]:
# california dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("california", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("California Results:", results)

regression
(16512, 8) (16512,)
(4128, 8) (4128,)
Epoch 001: Train MSE = 0.8081 (RMSE=0.8989), Val MSE = 0.4542 (RMSE=0.6739)
Epoch 002: Train MSE = 0.3877 (RMSE=0.6226), Val MSE = 0.3440 (RMSE=0.5865)
Epoch 003: Train MSE = 0.3489 (RMSE=0.5907), Val MSE = 0.3382 (RMSE=0.5815)
Epoch 004: Train MSE = 0.3472 (RMSE=0.5893), Val MSE = 0.3228 (RMSE=0.5681)
Epoch 005: Train MSE = 0.3206 (RMSE=0.5662), Val MSE = 0.3031 (RMSE=0.5505)
Epoch 006: Train MSE = 0.3149 (RMSE=0.5612), Val MSE = 0.3015 (RMSE=0.5491)
Epoch 007: Train MSE = 0.3031 (RMSE=0.5506), Val MSE = 0.3060 (RMSE=0.5532)
Epoch 008: Train MSE = 0.2965 (RMSE=0.5445), Val MSE = 0.3153 (RMSE=0.5615)
Epoch 009: Train MSE = 0.3047 (RMSE=0.5520), Val MSE = 0.2907 (RMSE=0.5392)
Epoch 010: Train MSE = 0.2931 (RMSE=0.5414), Val MSE = 0.2905 (RMSE=0.5390)
Epoch 011: Train MSE = 0.2822 (RMSE=0.5312), Val MSE = 0.2951 (RMSE=0.5433)
Epoch 012: Train MSE = 0.2826 (RMSE=0.5316), Val MSE = 0.2915 (RMSE=0.5399)
Epoch 013: Train MSE = 0.2831 (RMSE=0.5

In [5]:
# higgs dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("higgs", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("Higgs Results:", results)

binary classification
(8800000, 28) (8800000,)
(2200000, 28) (2200000,)
Epoch 001: Train Loss = 0.5091, Train Acc = 0.7430, Val Loss = 0.4903, Val Acc = 0.7570
Epoch 002: Train Loss = 0.4881, Train Acc = 0.7576, Val Loss = 0.4774, Val Acc = 0.7648
Epoch 003: Train Loss = 0.4801, Train Acc = 0.7631, Val Loss = 0.4729, Val Acc = 0.7681
Epoch 004: Train Loss = 0.4752, Train Acc = 0.7663, Val Loss = 0.4680, Val Acc = 0.7711
Epoch 005: Train Loss = 0.4715, Train Acc = 0.7687, Val Loss = 0.4658, Val Acc = 0.7725
Epoch 006: Train Loss = 0.4681, Train Acc = 0.7711, Val Loss = 0.4619, Val Acc = 0.7755
Epoch 007: Train Loss = 0.4655, Train Acc = 0.7727, Val Loss = 0.4613, Val Acc = 0.7758
Epoch 008: Train Loss = 0.4634, Train Acc = 0.7739, Val Loss = 0.4586, Val Acc = 0.7777
Epoch 009: Train Loss = 0.4618, Train Acc = 0.7750, Val Loss = 0.4587, Val Acc = 0.7779
Epoch 010: Train Loss = 0.4603, Train Acc = 0.7760, Val Loss = 0.4558, Val Acc = 0.7792
Epoch 011: Train Loss = 0.4591, Train Acc = 0.77

In [14]:
# creditcard dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("creditcard", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("Credit Card Results:", results)

binary classification
(227845, 30) (227845,)
(56962, 30) (56962,)
Epoch 001: Train Loss = 0.0066, Train Acc = 0.9992, Val Loss = 0.0030, Val Acc = 0.9995
Epoch 002: Train Loss = 0.0035, Train Acc = 0.9994, Val Loss = 0.0031, Val Acc = 0.9994
Epoch 003: Train Loss = 0.0034, Train Acc = 0.9993, Val Loss = 0.0030, Val Acc = 0.9994
Epoch 004: Train Loss = 0.0033, Train Acc = 0.9994, Val Loss = 0.0043, Val Acc = 0.9990
Epoch 005: Train Loss = 0.0032, Train Acc = 0.9994, Val Loss = 0.0026, Val Acc = 0.9995
Epoch 006: Train Loss = 0.0031, Train Acc = 0.9993, Val Loss = 0.0030, Val Acc = 0.9992
Epoch 007: Train Loss = 0.0031, Train Acc = 0.9994, Val Loss = 0.0028, Val Acc = 0.9994
Epoch 008: Train Loss = 0.0031, Train Acc = 0.9994, Val Loss = 0.0025, Val Acc = 0.9995
Epoch 009: Train Loss = 0.0030, Train Acc = 0.9994, Val Loss = 0.0026, Val Acc = 0.9995
Epoch 010: Train Loss = 0.0029, Train Acc = 0.9994, Val Loss = 0.0024, Val Acc = 0.9995
Epoch 011: Train Loss = 0.0030, Train Acc = 0.9994, Va

In [15]:
# poker dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("poker", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("Poker Results:", results)

multi-class classification
(25010, 10) (25010,)
(1000000, 10) (1000000,)
Epoch 001: Train Loss = 1.0224, Train Acc = 0.4844, Val Loss = 0.9874, Val Acc = 0.5012
Epoch 002: Train Loss = 0.9891, Train Acc = 0.4974, Val Loss = 0.9864, Val Acc = 0.4886
Epoch 003: Train Loss = 0.9866, Train Acc = 0.4980, Val Loss = 0.9847, Val Acc = 0.4970
Epoch 004: Train Loss = 0.9865, Train Acc = 0.4922, Val Loss = 0.9833, Val Acc = 0.5012
Epoch 005: Train Loss = 0.9800, Train Acc = 0.5020, Val Loss = 0.9673, Val Acc = 0.5268
Epoch 006: Train Loss = 0.9391, Train Acc = 0.5569, Val Loss = 0.8887, Val Acc = 0.5923
Epoch 007: Train Loss = 0.8736, Train Acc = 0.6044, Val Loss = 0.8074, Val Acc = 0.6492
Epoch 008: Train Loss = 0.7811, Train Acc = 0.6592, Val Loss = 0.7066, Val Acc = 0.6964
Epoch 009: Train Loss = 0.6794, Train Acc = 0.7137, Val Loss = 0.5553, Val Acc = 0.7731
Epoch 010: Train Loss = 0.5499, Train Acc = 0.7757, Val Loss = 0.4349, Val Acc = 0.8253
Epoch 011: Train Loss = 0.4142, Train Acc = 0.8

In [16]:
# wine dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("wine", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=200, batch_size=256, patience=10, lr=3e-4)
print("Wine Results:", results)

multi-class classification
(2558, 11) (2558,)
(640, 11) (640,)
Epoch 001: Train MSE = 8.3010 (RMSE=2.8811), Val MSE = 2.6171 (RMSE=1.6177)
Epoch 002: Train MSE = 2.2249 (RMSE=1.4916), Val MSE = 1.6618 (RMSE=1.2891)
Epoch 003: Train MSE = 1.5123 (RMSE=1.2298), Val MSE = 1.2104 (RMSE=1.1002)
Epoch 004: Train MSE = 1.1265 (RMSE=1.0614), Val MSE = 0.9403 (RMSE=0.9697)
Epoch 005: Train MSE = 0.8996 (RMSE=0.9485), Val MSE = 0.7833 (RMSE=0.8850)
Epoch 006: Train MSE = 0.7654 (RMSE=0.8749), Val MSE = 0.7030 (RMSE=0.8385)
Epoch 007: Train MSE = 0.6983 (RMSE=0.8356), Val MSE = 0.6659 (RMSE=0.8160)
Epoch 008: Train MSE = 0.6680 (RMSE=0.8173), Val MSE = 0.6520 (RMSE=0.8074)
Epoch 009: Train MSE = 0.6549 (RMSE=0.8092), Val MSE = 0.6481 (RMSE=0.8050)
Epoch 010: Train MSE = 0.6505 (RMSE=0.8065), Val MSE = 0.6469 (RMSE=0.8043)
Epoch 011: Train MSE = 0.6481 (RMSE=0.8050), Val MSE = 0.6451 (RMSE=0.8032)
Epoch 012: Train MSE = 0.6460 (RMSE=0.8037), Val MSE = 0.6417 (RMSE=0.8011)
Epoch 013: Train MSE = 0.

In [None]:
# covtype dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("covtype", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("Covtype Results:", results)

Epoch 001: Train Loss = 0.5127, Train Acc = 0.7833, Val Loss = 0.3634, Val Acc = 0.8482
Epoch 002: Train Loss = 0.3593, Train Acc = 0.8500, Val Loss = 0.2823, Val Acc = 0.8837
Epoch 003: Train Loss = 0.3061, Train Acc = 0.8731, Val Loss = 0.2454, Val Acc = 0.8989
Epoch 004: Train Loss = 0.2756, Train Acc = 0.8861, Val Loss = 0.2247, Val Acc = 0.9078
Epoch 005: Train Loss = 0.2542, Train Acc = 0.8950, Val Loss = 0.2087, Val Acc = 0.9144
Epoch 006: Train Loss = 0.2390, Train Acc = 0.9012, Val Loss = 0.1921, Val Acc = 0.9204
Epoch 007: Train Loss = 0.2268, Train Acc = 0.9068, Val Loss = 0.1902, Val Acc = 0.9213
Epoch 008: Train Loss = 0.2163, Train Acc = 0.9108, Val Loss = 0.1756, Val Acc = 0.9282
Epoch 009: Train Loss = 0.2082, Train Acc = 0.9140, Val Loss = 0.1704, Val Acc = 0.9301
Epoch 010: Train Loss = 0.2016, Train Acc = 0.9165, Val Loss = 0.1662, Val Acc = 0.9319
Epoch 011: Train Loss = 0.1951, Train Acc = 0.9197, Val Loss = 0.1544, Val Acc = 0.9368
Epoch 012: Train Loss = 0.1889, 

In [9]:
# bank dataset
data_loaders, n_cont, cat_cardinalities, d_out, task_type = prepare_data("bank", batch_size=256)
model = build_model(n_cont, cat_cardinalities, d_out)
results = train_model(model, data_loaders["train"], data_loaders["val"], task_type, d_out, n_epochs=100, batch_size=256, patience=10, lr=3e-4)
print("Bank Results:", results)

binary classification
(32950, 20) (32950,)
(8238, 20) (8238,)
Epoch 001: Train Loss = 0.2292, Train Acc = 0.8971, Val Loss = 0.1893, Val Acc = 0.9124
Epoch 002: Train Loss = 0.1897, Train Acc = 0.9090, Val Loss = 0.1840, Val Acc = 0.9182
Epoch 003: Train Loss = 0.1851, Train Acc = 0.9109, Val Loss = 0.1815, Val Acc = 0.9198
Epoch 004: Train Loss = 0.1832, Train Acc = 0.9132, Val Loss = 0.1783, Val Acc = 0.9193
Epoch 005: Train Loss = 0.1810, Train Acc = 0.9135, Val Loss = 0.1756, Val Acc = 0.9207
Epoch 006: Train Loss = 0.1792, Train Acc = 0.9143, Val Loss = 0.1743, Val Acc = 0.9212
Epoch 007: Train Loss = 0.1780, Train Acc = 0.9149, Val Loss = 0.1784, Val Acc = 0.9202
Epoch 008: Train Loss = 0.1771, Train Acc = 0.9158, Val Loss = 0.1727, Val Acc = 0.9216
Epoch 009: Train Loss = 0.1756, Train Acc = 0.9162, Val Loss = 0.1757, Val Acc = 0.9216
Epoch 010: Train Loss = 0.1757, Train Acc = 0.9167, Val Loss = 0.1726, Val Acc = 0.9210
Epoch 011: Train Loss = 0.1742, Train Acc = 0.9173, Val Lo