In [6]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertModel, BertConfig

# Load datasets
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')
test_labels = pd.read_csv('/content/test_label.csv')

# Preprocessing data: Normalize and convert to tensors
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train = scaler.fit_transform(train_data)
X_test = scaler.transform(test_data)
y_test = test_labels.values  # Assuming labels are in the first column

train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [7]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class TransformerAutoencoder(nn.Module):
    def __init__(self, input_dim, num_layers=1, num_heads=2, dim_feedforward=128):
        super(TransformerAutoencoder, self).__init__()
        self.input_projection = nn.Linear(input_dim, input_dim)
        encoder_layers = TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        x = self.input_projection(x)  # Project input data to the expected dimension
        x = x.unsqueeze(1)  # Add a dummy batch dimension [batch_size, 1, feature_size]
        encoded = self.transformer_encoder(x)
        decoded = self.decoder(encoded.squeeze(1))
        return decoded

autoencoder = TransformerAutoencoder(input_dim=train_data.shape[1])




In [8]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(train_data.shape[1], 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

discriminator = Discriminator()


In [9]:
def contrastive_loss(features, batch_size, temperature=0.05):
    labels = torch.cat([torch.arange(batch_size) for _ in range(2)], dim=0)
    labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float()
    features = nn.functional.normalize(features, dim=1)

    similarity_matrix = torch.matmul(features, features.T)
    positives = similarity_matrix[range(batch_size), range(batch_size)]
    negatives = similarity_matrix[~labels.bool()].reshape(batch_size, -1)

    logits = torch.cat([positives.unsqueeze(1), negatives], dim=1)
    labels = torch.zeros(batch_size, dtype=torch.long)

    return nn.CrossEntropyLoss()(logits / temperature, labels)


In [None]:
optimizer_gen = torch.optim.Adam(autoencoder.parameters(), lr=1e-4)
optimizer_disc = torch.optim.Adam(discriminator.parameters(), lr=1e-4)

epochs = 50
for epoch in range(epochs):
    for data in train_loader:
        optimizer_disc.zero_grad()
        real_data = data[0]
        real_data = real_data.unsqueeze(1)  # Ensure the input has the right shape
        fake_data = autoencoder(real_data).detach()
        real_loss = nn.BCELoss()(discriminator(real_data), torch.ones(real_data.size(0), 1))
        fake_loss = nn.BCELoss()(discriminator(fake_data), torch.zeros(fake_data.size(0), 1))
        disc_loss = (real_loss + fake_loss) / 2
        disc_loss.backward()
        optimizer_disc.step()

        # Train generator (autoencoder)
        optimizer_gen.zero_grad()
        reconstructed_data = autoencoder(real_data)
        gen_loss = nn.MSELoss()(reconstructed_data, real_data)
        gen_loss.backward()
        optimizer_gen.step()

    print(f'Epoch {epoch+1}, Loss D: {disc_loss.item()}, Loss G: {gen_loss.item()}')

# Add testing and evaluation as needed
