In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset

from Architectures.Basic_Sequence_classification import Transformer_For_Sequence_Classification2

In [4]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score

## Loading Tweet Dataset

In [5]:
dataset = load_dataset("tweet_eval", "emotion")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

NUM_LABELS = 6  # anger, joy, optimism, sadness, fear, love
MAX_LEN = 128

## Preprocess

In [6]:
def encode(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=MAX_LEN)

encoded_dataset = dataset.map(encode)

In [7]:
class TweetDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        item = {
            'input_ids': torch.tensor(self.dataset[idx]['input_ids']),
            'attention_mask': torch.tensor(self.dataset[idx]['attention_mask']),
        }
        item['labels'] = torch.tensor(self.dataset[idx]['label'])
        return item

train_dataset = TweetDataset(encoded_dataset['train'])
val_dataset = TweetDataset(encoded_dataset['validation'])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

## Model Definition and Training

In [14]:
class Config:
    def __init__(self,
                vocab_size,
                embed_dim=128,
                num_layers=10,
                num_heads=8,
                ff_dim=512,
                pre_normalization=True,
                max_position_embeddings=128,
                dropout_prob=0.1,
                num_labels=6):
        self.vocab_size = vocab_size  # Tokenizer vocab size
        self.embed_dim = embed_dim  # Embedding & input to attention
        self.num_layers = num_layers  # Number of encoder layers
        self.num_heads = num_heads  # Number of heads in Multi-Head Attention
        self.ff_dim = ff_dim  # Feed Forward hidden dimension
        self.pre_normalization = pre_normalization  # LayerNorm before or after attention/FFN
        self.max_position_embeddings = max_position_embeddings  # Max sequence length
        self.dropout_prob = dropout_prob  # Dropout probability
        self.num_labels = num_labels  # Output classes (for classification)

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Transformer_For_Sequence_Classification2(config=Config(vocab_size=tokenizer.vocab_size)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4)
loss_fn = nn.CrossEntropyLoss()

In [17]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        # Get batch data
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        
        # Calculate loss, backward pass, and update weights
        loss = loss_fn(logits, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")
    
    # Validation
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            logits = model(input_ids, attention_mask)
            predictions = torch.argmax(logits, dim=1)
            
            preds.extend(predictions.cpu().numpy())
            targets.extend(labels.cpu().numpy())
    
    acc = accuracy_score(targets, preds)
    f1 = f1_score(targets, preds, average='macro')
    print(f"Validation Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")


Epoch 1, Loss: 0.3865
Validation Accuracy: 0.6016, F1 Score: 0.4856
Epoch 2, Loss: 0.3432
Validation Accuracy: 0.6390, F1 Score: 0.5629
Epoch 3, Loss: 0.2702
Validation Accuracy: 0.5909, F1 Score: 0.5245
Epoch 4, Loss: 0.2418
Validation Accuracy: 0.6096, F1 Score: 0.5437
Epoch 5, Loss: 0.2203
Validation Accuracy: 0.5615, F1 Score: 0.5163
Epoch 6, Loss: 0.1844
Validation Accuracy: 0.5963, F1 Score: 0.5265
Epoch 7, Loss: 0.1735
Validation Accuracy: 0.5909, F1 Score: 0.5365
Epoch 8, Loss: 0.1513
Validation Accuracy: 0.5936, F1 Score: 0.5344
Epoch 9, Loss: 0.1221
Validation Accuracy: 0.5749, F1 Score: 0.4926
Epoch 10, Loss: 0.1442
Validation Accuracy: 0.5695, F1 Score: 0.5225


In [13]:
torch.save(model.state_dict(), 'best_model.pt')
