In [24]:
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from transformers import BertTokenizer, BertModel
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [25]:
def get_token_index(tokens, start):
    char_index = 0
    
    for i, token in enumerate(tokens):
        token_start = char_index
        token_end = char_index + len(token)
        
        if token_start <= start and start <= token_end:
            return i
        
        char_index = token_end + 1
    
    return -1

In [26]:
def preprocessing(input_file, output_file):  
    with open(input_file, 'r', encoding='utf-8') as file:
        data = json.load(file)

    preprocessed_data = []

    for item in data:
        sentence = item['sentence']
        tokens = sentence.split()
        aspect_terms = item['aspect_terms']

        for aspect_term in aspect_terms:
            start = int(aspect_term['from'])
            end = int(aspect_term['to'])
            index = get_token_index(tokens, start)

            preprocessed_item = {
                'tokens': tokens,
                'polarity': aspect_term['polarity'],
                'aspect_term': [aspect_term['term']],
                'index': index
            }
            preprocessed_data.append(preprocessed_item)

    with open(output_file, 'w', encoding='utf-8') as out_file:
        json.dump(preprocessed_data, out_file, indent=4, ensure_ascii=False)

In [27]:
preprocessing('train.json', 'train_task_2.json')
preprocessing('val.json', 'val_task_2.json')

In [28]:
bert_model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = BertModel.from_pretrained(bert_model_name)

def get_bert_embeddings(sentence_tokens, aspect):
    sentence = " ".join(sentence_tokens)

    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    aspect_inputs = tokenizer(aspect, return_tensors="pt", padding=True, truncation=True)

    with torch.no_grad():
        sentence_outputs = bert_model(**inputs)
        aspect_outputs = bert_model(**aspect_inputs)

    sentence_embedding = sentence_outputs.last_hidden_state[:, 0, :]  # [CLS] embedding
    aspect_embedding = aspect_outputs.last_hidden_state[:, 0, :]  # Aspect [CLS] embedding

    return sentence_embedding, aspect_embedding


In [29]:
class ABSADataset(Dataset):
    def __init__(self, data, label_map, bert_model="bert-base-uncased", max_len=35):
        """
        Args:
            data (list): List of dicts with 'tokens', 'aspect', and 'polarity'.
            label_map (dict): Mapping of sentiment labels to integers.
            bert_model (str): Pretrained BERT model name.
            max_len (int): Max sequence length for tokenization.
        """
        self.data = data
        self.label_map = label_map
        self.tokenizer = BertTokenizer.from_pretrained(bert_model)
        self.bert_model = BertModel.from_pretrained(bert_model)
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        sentence = " ".join(item["tokens"])  # Convert tokens to a full sentence
        aspect = item["aspect_term"]
        polarity = self.label_map[item["polarity"]]

        # Tokenize sentence
        sentence_inputs = self.tokenizer(sentence, padding="max_length", truncation=True,
                                         max_length=self.max_len, return_tensors="pt")
        
        # Tokenize aspect
        aspect_inputs = self.tokenizer(aspect, padding="max_length", truncation=True,
                                       max_length=self.max_len, return_tensors="pt")

        # Compute BERT embeddings (disable gradients for efficiency)
        with torch.no_grad():
            sentence_embedding = self.bert_model(**sentence_inputs).last_hidden_state.squeeze(0)  # [seq_len, emb_dim]
            aspect_embedding = self.bert_model(**aspect_inputs).last_hidden_state[:, 0, :]  # CLS token

        return sentence_embedding, aspect_embedding, torch.tensor(polarity, dtype=torch.long)


In [30]:
class AspectAwareLSTM(nn.Module):
    def __init__(self, embedding_dim=768, hidden_size=128, aspect_dim=768, num_lstm_layers=1, dropout = 0.5):
        super(AspectAwareLSTM, self).__init__()
        self.hidden_size = hidden_size

        # LSTM (Processes BERT word embeddings)
        self.lstm = nn.GRU(embedding_dim, hidden_size, num_layers=num_lstm_layers, batch_first=True)
        # Attention mechanism (Concatenating hidden states with aspect embeddings)
        self.attention = nn.Linear(hidden_size * 2 + aspect_dim, 1, bias=False)  # BiLSTM outputs 2*hidden_size

        # Transformation Layer Before Softmax
        self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size)  
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        # Final output layer (4 sentiment classes: pos, neg, neutral, conflict)
        self.fc_output = nn.Linear(hidden_size, 4)

    def forward(self, word_embeddings, aspect_embeddings):
        """
        sentences: [batch_size, seq_len]
        aspects: [batch_size] (aspect terms as indices)
        """
        batch_size, seq_len, _ = word_embeddings.shape  

        # LSTM Processing
        lstm_out, _ = self.lstm(word_embeddings)  # Output shape: [batch_size, seq_len, hidden_size * 2]

        # Concatenate Aspect Embedding with LSTM Hidden States
        aspect_repeated = aspect_embeddings.expand(-1, seq_len, -1)
        att_input = torch.cat([lstm_out, aspect_repeated], dim=-1)  # [batch_size, seq_len, hidden_size + aspect_dim]

        # Compute Aspect-aware Attention Scores
        att_weights = torch.tanh(self.attention(att_input))  # [batch_size, seq_len, 1]
        att_weights = torch.softmax(att_weights, dim=1)  # Normalize across sequence

        # Compute Weighted Sum of LSTM Hidden States
        weighted_sum = torch.sum(lstm_out * att_weights, dim=1)  # [batch_size, hidden_size]

        # New Fully Connected Transformation Layer
        transformed_features = self.fc_hidden(weighted_sum)  # [batch_size, hidden_size // 2]
        transformed_features = self.activation(transformed_features)  # Non-linearity
        transformed_features = self.dropout(transformed_features)   
        # Sentiment Prediction (Softmax over Output)
        output = self.fc_output(transformed_features)  # [batch_size, 4] (logits for sentiment classes)

        return output

In [31]:
def train(train_dataloader, val_dataloader, model, device, num_epochs, lr):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    train_loss_list = []
    val_loss_list = []
    best_val_loss = float("inf")
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for batch in train_dataloader:
            tokens, aspect, labels = batch  # Extract batch components
            tokens, aspect, labels = tokens.to(device), aspect.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(tokens, aspect)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss

            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            # Track metrics
            total_loss += loss.item()
            _, predicted = torch.max(outputs, dim=1)  # Get predicted class
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        avg_train_loss = total_loss / len(train_dataloader)

        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for batch in val_dataloader:
                tokens, aspect, labels = batch
                tokens, aspect, labels = tokens.to(device), aspect.to(device), labels.to(device)

                outputs = model(tokens, aspect)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, dim=1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total
        avg_val_loss = val_loss / len(val_dataloader)

        train_loss_list.append(avg_train_loss)
        val_loss_list.append(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'AspectAwareLSTM.pth')

        print(f"Epoch [{epoch+1}/{num_epochs}] - "
              f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
    return train_loss_list, val_loss_list

In [32]:
with open('train_task_2.json', 'r', encoding='utf-8') as file:
    train_data = json.load(file)

polarities = []
token_lengths = []
for item in train_data:
    if(item['polarity'] not in polarities):
        polarities.append(item['polarity'])
    token_lengths.append(len(item['tokens']))
    
print("polarities:", polarities)
print("Max Sequence Length:", np.percentile(token_lengths, 95))


with open('val_task_2.json', 'r', encoding='utf-8') as file:
    val_data = json.load(file)


polarities: ['negative', 'positive', 'neutral', 'conflict']
Max Sequence Length: 35.0


In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AspectAwareLSTM()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
label_map = {"positive": 0, "negative": 1, "neutral": 2, "conflict": 3}
num_epochs = 10
learning_rate = 1e-3
train_dataset = ABSADataset(train_data, label_map)
val_dataset = ABSADataset(val_data, label_map)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [34]:
train_loss_list, val_loss_list = train(train_dataloader, val_dataloader, model, device, num_epochs, learning_rate)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1120x896 and 1024x1)