In [1]:
# Imports
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


Podemos encontrar el dataset en kaggle, consultar la siguiente dirección --> <a href="https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009"> Red Wine Quality </a>

Debido al fracaso del algoritmo TabPFN. ahora vamos a probar con el algoritmo transformer TabTransformer

In [2]:
# Load the Dataset
dir_dataset = "Data/winequality-red.csv"
data = pd.read_csv(dir_dataset, sep=';')

In [3]:
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [4]:
X = data.drop("quality", axis=1)
y = data["quality"] - 3  # Adjusting labels to be zero-indexed

In [5]:
# Preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42)

# Standarize the features (X)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Aquí definimos el modelo TabTransformer de Aravind Koli en <a href="medium.com"> Medium </a>

In [6]:
# Now we define the TabTransformer Model
# I copied it from 'Aravind Koli in medium.com' 
class TabTransformer(nn.Module):
    def __init__(self, num_features, num_classes, dim_embedding=64, num_heads=4, num_layers=4):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = nn.Linear(dim_embedding, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding a sequence length dimension
        x = self.transformer(x)
        x = torch.mean(x, dim=1)  # Pooling
        x = self.classifier(x)
        return x

In [14]:
# Now we initialize the model, loss and optimizer
model = TabTransformer(num_features=X_train.shape[1], num_classes=6).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train.values)

# Training loop 
for epoch in range(100):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = loss_fn(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item():.4f}")

Epoch: 0, Loss: 2.0660
Epoch: 10, Loss: 1.0087
Epoch: 20, Loss: 0.9643
Epoch: 30, Loss: 0.9317
Epoch: 40, Loss: 0.9115
Epoch: 50, Loss: 0.8859
Epoch: 60, Loss: 0.8597
Epoch: 70, Loss: 0.8356
Epoch: 80, Loss: 0.7909
Epoch: 90, Loss: 0.7386


In [15]:
# Evaluation
model.eval()
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test.values)

with torch.no_grad():
    predictions = model(X_test_tensor)
    _, predicted_classes = torch.max(predictions, 1)
    accuracy = (predicted_classes == y_test_tensor).float().mean()
    print(f'Test Accuracy: {accuracy.item()}')

Test Accuracy: 0.6229166388511658


Después de haber ejecutado 100 épocas no ha sido relevante y hemos tenido una accuracy de apenas 64%, menos de un punto por encima de la baseline planteada en 63%.

Voy a probar con más épocas a ver que ocurre.

In [18]:
# Now we initialize the model, loss and optimizer
model = TabTransformer(num_features=X_train.shape[1], num_classes=6).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=.001)

# Converting data to tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train.values)

# Training loop 
for epoch in range(1000):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = loss_fn(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item():.4f}")
        
        # Evaluation
model.eval()
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test.values)

with torch.no_grad():
    predictions = model(X_test_tensor)
    _, predicted_classes = torch.max(predictions, 1)
    accuracy = (predicted_classes == y_test_tensor).float().mean()
    print(f'Test Accuracy: {accuracy.item()}')

Epoch: 0, Loss: 1.8221
Epoch: 100, Loss: 0.6431
Epoch: 200, Loss: 0.1934
Epoch: 300, Loss: 0.0434
Epoch: 400, Loss: 0.0326
Epoch: 500, Loss: 0.0219
Epoch: 600, Loss: 0.0215
Epoch: 700, Loss: 0.0154
Epoch: 800, Loss: 0.0287
Epoch: 900, Loss: 0.0116
Test Accuracy: 0.6333333253860474


He probado desde las 100 a las 1000 épocas y nada, para este dataset pueden ocurrir dos cosas:
- (1) El uso de algoritmos transformer no está siendo el más indicado y deberíamos afrontar el problema de otra manera
- (2) Debería probar más algoritmos transformer por si únicamente estos dos fallan

Opto por la primera por que los algoritmos más simples son los que he usado y el resto serían una GrowNet que para este tamaño no va a resulta últil o un FTTransformer que tampoco va a ser resolutivo ya que no tenemos columnas que transformar de la manera en que lo hace.