In [8]:
import torch
from torch import nn
from transformers import BertTokenizer, BertModel
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import random
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast

# Load the GoEmotions dataset
dataset = load_dataset("go_emotions")
train_data = dataset['train']
val_data = dataset['validation']

# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Tokenize and save datasets for reuse
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding='max_length',
        truncation=True,
        max_length=128
    )

if not torch.cuda.is_available():  # Tokenize only if data isn't pre-saved
    train_data = train_data.map(tokenize_function, batched=True)
    val_data = val_data.map(tokenize_function, batched=True)

    # Set format to PyTorch tensors
    train_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    val_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Save tokenized data
    torch.save(train_data, "train_data_tokenized.pt")
    torch.save(val_data, "val_data_tokenized.pt")
else:  # Load pre-tokenized data
    train_data = torch.load("train_data_tokenized.pt")
    val_data = torch.load("val_data_tokenized.pt")

# Define a contrastive loss function
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        distance = nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean((1 - label) * torch.pow(distance, 2) +
                          label * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
        return loss

# Create the Contrastive Model
class ContrastiveModel(nn.Module):
    def __init__(self):
        super(ContrastiveModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.pooler_output

# Create a simple Contrastive Dataset
class ContrastiveDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        current_labels = self.dataset[idx]['labels']
        label = torch.argmax(current_labels).item()

        is_positive = random.choice([True, False])

        if is_positive:
            positive_idx = idx
            while positive_idx == idx:
                positive_idx = random.randint(0, len(self.dataset) - 1)
            return {
                'input_ids': self.dataset[idx]['input_ids'],
                'attention_mask': self.dataset[idx]['attention_mask'],
                'labels': torch.tensor(1, dtype=torch.float),
                'input_ids_2': self.dataset[positive_idx]['input_ids'],
                'attention_mask_2': self.dataset[positive_idx]['attention_mask']
            }
        else:
            negative_idx = idx
            while negative_idx == idx or torch.argmax(self.dataset[negative_idx]['labels']).item() == label:
                negative_idx = random.randint(0, len(self.dataset) - 1)
            return {
                'input_ids': self.dataset[idx]['input_ids'],
                'attention_mask': self.dataset[idx]['attention_mask'],
                'labels': torch.tensor(0, dtype=torch.float),
                'input_ids_2': self.dataset[negative_idx]['input_ids'],
                'attention_mask_2': self.dataset[negative_idx]['attention_mask']
            }

# Use the ContrastiveDataset
train_dataset = ContrastiveDataset(train_data)
val_dataset = ContrastiveDataset(val_data)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, num_workers=0, pin_memory=True)

# Initialize model, optimizer, and scaler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ContrastiveModel().to(device)
loss_fn = ContrastiveLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scaler = GradScaler()

# Training loop with mixed precision and gradient accumulation
accumulation_steps = 4

def train_with_optimizations(model, dataloader, loss_fn, optimizer, scaler, accumulation_steps):
    model.train()
    optimizer.zero_grad()

    for batch_idx, batch in enumerate(tqdm(dataloader, desc="Training", leave=False)):
        input_ids_1 = batch['input_ids'].to(device)
        attention_mask_1 = batch['attention_mask'].to(device)
        input_ids_2 = batch['input_ids_2'].to(device)
        attention_mask_2 = batch['attention_mask_2'].to(device)
        labels = batch['labels'].to(device)

        with autocast():
            output1 = model(input_ids_1, attention_mask_1)
            output2 = model(input_ids_2, attention_mask_2)
            loss = loss_fn(output1, output2, labels) / accumulation_steps

        scaler.scale(loss).backward()

        if (batch_idx + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

# Evaluation loop
def evaluate(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating", leave=False):
            input_ids_1 = batch['input_ids'].to(device)
            attention_mask_1 = batch['attention_mask'].to(device)
            input_ids_2 = batch['input_ids_2'].to(device)
            attention_mask_2 = batch['attention_mask_2'].to(device)
            labels = batch['labels'].to(device)

            output1 = model(input_ids_1, attention_mask_1)
            output2 = model(input_ids_2, attention_mask_2)

            distance = nn.functional.pairwise_distance(output1, output2)
            preds = (distance < 1.0).float()

            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predictions)
    return accuracy

# Main training and evaluation
for epoch in range(1 ):
    print(f"Epoch {epoch + 1}/3")
    train_with_optimizations(model, train_dataloader, loss_fn, optimizer, scaler, accumulation_steps)
    val_accuracy = evaluate(model, val_dataloader)
    print(f'Validation Accuracy: {val_accuracy:.4f}')


  scaler = GradScaler()


Epoch 1/3


  with autocast():
                                                  

KeyboardInterrupt: 

In [9]:
model_save_path = "contrastive_model.pth"
model.load_state_dict(torch.load(model_save_path, map_location=device))

  model.load_state_dict(torch.load(model_save_path, map_location=device))


FileNotFoundError: [Errno 2] No such file or directory: 'contrastive_model.pth'