In [1]:
#TabNet implementation

In [5]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
   ---------------------------------------- 0.0/44.5 kB ? eta -:--:--
   ------------------------------------ --- 41.0/44.5 kB 1.9 MB/s eta 0:00:01
   ---------------------------------------- 44.5/44.5 kB 543.5 kB/s eta 0:00:00
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0


In [7]:
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from pytorch_tabnet.tab_model import TabNetClassifier

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load synthetic tabular data
n_samples = 10000
n_numerical = 5
n_categorical = 3
num_classes = 2

X_num = np.random.randn(n_samples, n_numerical)
X_cat = np.random.randint(0, 10, size=(n_samples, n_categorical))
y = np.random.randint(0, num_classes, size=(n_samples,))

# Encode categorical features
encoders = [LabelEncoder() for _ in range(n_categorical)]
X_cat = np.column_stack([enc.fit_transform(X_cat[:, i]) for i, enc in enumerate(encoders)])

# Normalize numerical features
scaler = StandardScaler()
X_num = scaler.fit_transform(X_num)

# Convert to DataFrame
columns = [f"num_{i}" for i in range(n_numerical)] + [f"cat_{i}" for i in range(n_categorical)]
df = pd.DataFrame(np.hstack([X_num, X_cat]), columns=columns)
df["label"] = y

# Split data
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# TabNet Model
cat_idxs = [i for i in range(n_numerical, n_numerical + n_categorical)]
cat_dims = [10] * n_categorical

clf = TabNetClassifier(cat_idxs=cat_idxs, cat_dims=cat_dims, device_name=device.type)
X_train, X_test = train_data.drop(columns=["label"]).values, test_data.drop(columns=["label"]).values
y_train, y_test = train_data["label"].values, test_data["label"].values

clf.fit(X_train, y_train, eval_set=[(X_test, y_test)], max_epochs=10, patience=5)


Using device: cpu




epoch 0  | loss: 0.96895 | val_0_auc: 0.50486 |  0:00:01s
epoch 1  | loss: 0.7332  | val_0_auc: 0.50562 |  0:00:02s
epoch 2  | loss: 0.70577 | val_0_auc: 0.52279 |  0:00:03s
epoch 3  | loss: 0.69675 | val_0_auc: 0.51015 |  0:00:04s
epoch 4  | loss: 0.69485 | val_0_auc: 0.48361 |  0:00:04s
epoch 5  | loss: 0.69471 | val_0_auc: 0.49412 |  0:00:05s
epoch 6  | loss: 0.69349 | val_0_auc: 0.49138 |  0:00:06s
epoch 7  | loss: 0.69322 | val_0_auc: 0.47962 |  0:00:06s

Early stopping occurred at epoch 7 with best_epoch = 2 and best_val_0_auc = 0.52279


