In [None]:
# example from : https://aravindkolli.medium.com/mastering-tabular-data-with-tabtransformer-a-comprehensive-guide-119f6dbf5a79

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define the TabTransformer model
class TabTransformer(nn.Module):
    def __init__(self, num_features, num_classes, dim_embedding=64, num_heads=4, num_layers=4):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = nn.Linear(dim_embedding, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding a sequence length dimension
        x = self.transformer(x)
        x = torch.mean(x, dim=1)  # Pooling
        x = self.classifier(x)
        return x

# Load the dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Splitting the dataset into features and target variable, adjust labels to be zero-indexed
X = data.drop('quality', axis=1)
y = data['quality'] - 3  # Adjusting labels to be zero-indexed

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model parameters
num_features = X_train_scaled.shape[1]
num_classes = 6  # Adjusted based on unique labels

# Initialize the model, loss, and optimizer
model = TabTransformer(num_features, num_classes).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train.values)

# Training loop
for epoch in range(100):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Evaluation
model.eval()
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test.values)

with torch.no_grad():
    predictions = model(X_test_tensor)
    _, predicted_classes = torch.max(predictions, 1)
    accuracy = (predicted_classes == y_test_tensor).float().mean()
    print(f'Test Accuracy: {accuracy.item()}')


Epoch 0, Loss: 2.0862367153167725
Epoch 10, Loss: 1.000978946685791
Epoch 20, Loss: 0.9669484496116638
Epoch 30, Loss: 0.9404944777488708
Epoch 40, Loss: 0.9218689799308777
Epoch 50, Loss: 0.8944188356399536
Epoch 60, Loss: 0.8558730483055115
Epoch 70, Loss: 0.8351965546607971
Epoch 80, Loss: 0.7923721671104431
Epoch 90, Loss: 0.7510438561439514
Test Accuracy: 0.6000000238418579


In [2]:
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [3]:
data.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [4]:
model

TabTransformer(
  (embedding): Linear(in_features=11, out_features=64, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (classifier): Linear(in_features=64, out_features=6, bias=True)
)

In [5]:
X_train_scaled

array([[ 0.21833164,  0.88971201,  0.19209222, ...,  1.09349989,
         0.45822284,  1.12317723],
       [-1.29016623, -1.78878251,  0.65275338, ..., -0.40043872,
        -0.40119696,  1.40827174],
       [ 1.49475291, -0.78434707,  1.01104539, ..., -0.07566946,
         0.51551749, -0.58738978],
       ...,
       [-0.65195559,  0.49909822, -1.08752211, ...,  1.28836145,
        -0.68767023, -0.87248428],
       [-0.24582155, -1.84458448,  0.39683051, ...,  0.05423824,
         0.80199076,  1.40827174],
       [-1.46422367, -1.34236676, -0.06383064, ...,  0.50891521,
        -0.68767023,  2.92877575]])

In [6]:
X_train_tensor

tensor([[ 0.2183,  0.8897,  0.1921,  ...,  1.0935,  0.4582,  1.1232],
        [-1.2902, -1.7888,  0.6528,  ..., -0.4004, -0.4012,  1.4083],
        [ 1.4948, -0.7843,  1.0110,  ..., -0.0757,  0.5155, -0.5874],
        ...,
        [-0.6520,  0.4991, -1.0875,  ...,  1.2884, -0.6877, -0.8725],
        [-0.2458, -1.8446,  0.3968,  ...,  0.0542,  0.8020,  1.4083],
        [-1.4642, -1.3424, -0.0638,  ...,  0.5089, -0.6877,  2.9288]])

In [7]:
predictions

tensor([[-3.5352, -2.1309,  2.7195,  2.0722, -1.6192, -2.8583],
        [-3.4552, -1.6313,  3.0280,  1.8754, -1.8905, -2.8920],
        [-3.2193, -2.8564,  0.3702,  2.9369,  0.2616, -2.9042],
        ...,
        [-3.6126, -2.4807,  2.1837,  2.4224, -1.2016, -3.0023],
        [-3.0412, -2.7418, -0.7384,  1.2761,  2.8579, -0.0498],
        [-1.4970,  0.1459,  3.0048,  1.6038, -2.2168, -4.0860]])

In [8]:
predicted_classes

tensor([2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 4, 2, 3, 2, 3, 4, 2, 3, 4, 2, 2, 3, 2, 3,
        3, 3, 3, 2, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 4, 3,
        2, 3, 2, 2, 3, 4, 2, 2, 3, 2, 3, 2, 2, 3, 2, 2, 4, 2, 4, 2, 4, 3, 3, 2,
        3, 3, 3, 2, 4, 2, 3, 4, 2, 4, 2, 2, 3, 3, 2, 3, 3, 2, 3, 1, 2, 2, 2, 3,
        2, 3, 1, 3, 2, 2, 3, 1, 3, 3, 3, 2, 1, 2, 3, 2, 4, 2, 2, 4, 3, 2, 2, 3,
        4, 3, 3, 2, 3, 3, 4, 3, 2, 3, 3, 2, 2, 4, 3, 2, 2, 2, 2, 3, 3, 3, 4, 3,
        3, 2, 3, 1, 3, 2, 3, 4, 3, 3, 3, 3, 2, 3, 3, 3, 4, 3, 4, 2, 2, 3, 2, 2,
        2, 2, 2, 2, 4, 3, 4, 3, 4, 2, 3, 2, 2, 4, 3, 4, 2, 4, 3, 3, 4, 3, 3, 3,
        2, 3, 3, 2, 2, 3, 2, 3, 3, 2, 2, 2, 4, 3, 2, 4, 3, 4, 4, 2, 3, 3, 4, 3,
        3, 2, 3, 3, 2, 3, 3, 3, 2, 2, 2, 4, 2, 2, 2, 2, 4, 3, 3, 3, 2, 3, 3, 2,
        3, 2, 3, 3, 2, 3, 3, 2, 4, 2, 3, 2, 2, 3, 1, 2, 2, 3, 3, 4, 3, 1, 2, 4,
        3, 4, 2, 2, 4, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 2, 3, 2, 3,
        2, 3, 3, 4, 3, 3, 2, 3, 2, 3, 4,

In [9]:
y_train_tensor

tensor([3, 3, 3,  ..., 2, 4, 3])

In [None]:
accuracy = (predicted_classes == y_test_tensor).float().mean()
print(f'Test Accuracy: {accuracy.item()}')

In [10]:
(predicted_classes == y_test_tensor)

tensor([False,  True,  True, False,  True,  True,  True,  True, False,  True,
         True, False, False,  True,  True,  True,  True, False, False,  True,
         True,  True,  True,  True,  True,  True, False, False,  True,  True,
         True, False,  True,  True,  True,  True, False, False, False,  True,
        False,  True, False,  True, False,  True,  True,  True,  True,  True,
         True,  True, False,  True, False, False,  True,  True, False,  True,
         True, False,  True, False, False,  True, False,  True, False, False,
         True, False,  True,  True,  True,  True, False,  True,  True, False,
         True, False,  True, False,  True, False,  True,  True, False, False,
        False, False,  True,  True,  True,  True,  True,  True, False,  True,
         True,  True, False, False,  True, False,  True,  True, False,  True,
        False,  True, False,  True, False,  True,  True, False,  True,  True,
        False,  True,  True, False,  True,  True,  True,  True, 

In [11]:
(predicted_classes == y_test_tensor).float()

tensor([0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0.,
        0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1.,
        0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
        0., 0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0.,
        1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 0.,
        0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
        0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1.,
        1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 0.,
        0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.,
        1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0.,
        0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1.,
        1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0.,
        1., 1., 1., 1., 0., 1., 1., 0., 

In [12]:
accuracy

tensor(0.6000)

In [14]:
accuracy.item()


0.6000000238418579