In [None]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

def encode_sentence(sentence):
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True, max_length=128)
    outputs = bert_model(**inputs)
    return outputs.last_hidden_state[:, 0, :]  # CLS token embedding


In [None]:
import torch.nn as nn

class WordAttention(nn.Module):
    def __init__(self, hidden_dim):
        super(WordAttention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, word_embeddings):
        attention_weights = torch.softmax(self.attention(word_embeddings), dim=0)
        weighted_sum = torch.sum(attention_weights * word_embeddings, dim=0)
        return weighted_sum  # Shape: [hidden_dim]

class SentenceAttention(nn.Module):
    def __init__(self, hidden_dim):
        super(SentenceAttention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, sentence_embeddings):
        attention_weights = torch.softmax(self.attention(sentence_embeddings), dim=0)
        weighted_sum = torch.sum(attention_weights * sentence_embeddings, dim=0)
        return weighted_sum  # Shape: [hidden_dim]


In [None]:
class MultiTaskHierarchicalBERT(nn.Module):
    def __init__(self, hidden_dim=768, num_classes_sentiment=3, num_classes_emotion=5):
        super(MultiTaskHierarchicalBERT, self).__init__()
        self.word_attention = WordAttention(hidden_dim)
        self.sentence_attention = SentenceAttention(hidden_dim)
        
        # Task-specific classifiers
        self.sentiment_head = nn.Linear(hidden_dim, num_classes_sentiment)
        self.emotion_head = nn.Linear(hidden_dim, num_classes_emotion)

    def forward(self, documents):
        sentence_embeddings = []
        for doc in documents:  # Process each document
            word_embeddings = encode_sentence(doc)
            sentence_embedding = self.word_attention(word_embeddings)
            sentence_embeddings.append(sentence_embedding)

        sentence_embeddings = torch.stack(sentence_embeddings)
        document_embedding = self.sentence_attention(sentence_embeddings)

        # Task-specific outputs
        sentiment_output = self.sentiment_head(document_embedding)
        emotion_output = self.emotion_head(document_embedding)

        return sentiment_output, emotion_output


In [None]:
criterion = nn.CrossEntropyLoss()  # Same loss for both tasks (classification)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Training loop
for epoch in range(num_epochs):
    for documents, sentiment_labels, emotion_labels in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        sentiment_output, emotion_output = model(documents)
        
        # Compute individual losses
        sentiment_loss = criterion(sentiment_output, sentiment_labels)
        emotion_loss = criterion(emotion_output, emotion_labels)
        
        # Combine losses (weighted sum)
        total_loss = sentiment_loss + emotion_loss
        total_loss.backward()
        optimizer.step()


In [None]:
total_loss = 0.7 * sentiment_loss + 0.3 * emotion_loss  # Weighted sum
