# Importing Libraries and Formatting Data

In [1]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch
from torch import nn
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from transformers import AutoModel

df = pd.read_csv('/kaggle/input/lollol/HIGGS.csv/HIGGS.csv')
print(len(df))
class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

# Split the dataframe into training and testing datasets
train_df = df.iloc[:1100000]
test_df = df.iloc[-100000:]  # The last 100k rows

train_features = train_df.iloc[:, 1:22].values
train_targets = train_df.iloc[:,0].values

test_features = test_df.iloc[:, 1:22].values
test_targets = test_df.iloc[:,0].values

train_dataset = CustomDataset(torch.from_numpy(train_features).float(), torch.from_numpy(train_targets).long())
test_dataset = CustomDataset(torch.from_numpy(test_features).float(), torch.from_numpy(test_targets).long())

batch_size=64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


10999999


# Transformer autoencoder for classification

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score

class TransformerAutoencoder(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers):
        super(TransformerAutoencoder, self).__init__()
        self.input_layer = nn.Linear(input_dim, d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_encoder_layers)
        
        # Define the Transformer decoder layer
        self.decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, batch_first=True)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_decoder_layers)
        
        self.decoder = nn.Sequential(
            nn.Linear(d_model,20),
            nn.ReLU(),
            nn.Linear(20,1),
            nn.Sigmoid()  # Sigmoid activation for pixel values between 0 and 1
        )

    def forward(self, x):
        src = self.input_layer(x)
        memory = self.transformer_encoder(x)
        decoded = self.transformer_decoder(src,memory)
        return self.decoder(decoded)


# Hyperparameters
input_dim = 21
d_model = 21
0
nhead =7
num_encoder_layers = 1
num_decoder_layers = 1
num_epochs = 50
learning_rate = 0.001
batch_size = 64

# Initialize the model and move it to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TransformerAutoencoder(input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers).to(device)
model.use_nested_tensor = True

# Loss and optimizer
criterion =nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)



# Training loop
for epoch in range(num_epochs):
    epoch_loss=0.0
    for i, (data, labels) in enumerate(train_loader):
        data = data.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(data)

        loss = criterion(outputs[:,0], labels.float())
        epoch_loss += loss.item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item()}')


# Evaluation
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for data, labels in test_loader:
        data = data.to(device)
        labels = labels.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

print('ROC-AUC score: ', roc_auc_score(y_true, y_pred))


Epoch [1/50], Step [17188/17188], Loss: 0.62662672996521
Epoch [2/50], Step [17188/17188], Loss: 0.6776449680328369
Epoch [3/50], Step [17188/17188], Loss: 0.6665347814559937
Epoch [4/50], Step [17188/17188], Loss: 0.6695046424865723
Epoch [5/50], Step [17188/17188], Loss: 0.629170298576355
Epoch [6/50], Step [17188/17188], Loss: 0.6439266800880432
Epoch [7/50], Step [17188/17188], Loss: 0.618904173374176
Epoch [8/50], Step [17188/17188], Loss: 0.7013390064239502
Epoch [9/50], Step [17188/17188], Loss: 0.6393381357192993
Epoch [10/50], Step [17188/17188], Loss: 0.5353981852531433
Epoch [11/50], Step [17188/17188], Loss: 0.5754103660583496
Epoch [12/50], Step [17188/17188], Loss: 0.6596124172210693
Epoch [13/50], Step [17188/17188], Loss: 0.6680445671081543
Epoch [14/50], Step [17188/17188], Loss: 0.6128355860710144
Epoch [15/50], Step [17188/17188], Loss: 0.6691184043884277
Epoch [16/50], Step [17188/17188], Loss: 0.6345705986022949
Epoch [17/50], Step [17188/17188], Loss: 0.6168990731

In [16]:
torch.save(model.state_dict(), 'model_weights.pth')


I tried several auto encoder architectures including Transformer decoder, simple linear hidden unit decoders and single hidden layer decoders. The transformer decoder proved to be the best.