#### SOTA_MODELS

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn
from transformers import BertForSequenceClassification
from transformers import BertModel

#Data
train_df = pd.read_parquet("train.parquet")
test_df = pd.read_parquet("test2.parquet")

def extract_valid_samples(df):
    data = []
    for _, row in df.iterrows():
        sentence = row["sentence"]
        aspect = row["aspect"]
        sentiment_dict = row["sentiment_dict"]
        sentiment = sentiment_dict.get(aspect, None)
        if sentiment is not None:
            input_text = f"aspect: {aspect} sentence: {sentence}"
            data.append((input_text, sentiment))
    return pd.DataFrame(data, columns=["text", "label"])

train_data = extract_valid_samples(train_df)
test_data = extract_valid_samples(test_df)
train_data.to_csv("train_processed.csv", index=False)
test_data.to_csv("test_processed.csv", index=False)
print(train_data.head())
print(test_data.head())



#Preprocessing
train_df = pd.read_csv("train_processed.csv")
test_df = pd.read_csv("test_processed.csv")

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

label_map = {-1: 0, 0: 1, 1: 2}
train_df["label"] = train_df["label"].map(label_map)
test_df["label"] = test_df["label"].map(label_map)

class ABSADataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(self.texts[idx], truncation=True, padding='max_length',
                             max_length=self.max_len, return_tensors='pt')
        item = {key: val.squeeze(0) for key, val in enc.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item



#Bert_Base_Model
class Bert_Base(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.bert = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=num_classes)
        print("Loaded Bert_Base")

    def forward(self, input_ids, attention_mask, token_type_ids, labels):
        output = self.bert(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           labels=labels)
        return output.loss, output.logits
    

#Bert_LSTM_Model
class Bert_LSTM(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased", output_hidden_states=True)
        self.lstm = nn.LSTM(input_size=768, hidden_size=128, batch_first=True)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(128, num_classes)
        print("Loaded Bert_LSTM")

    def forward(self, input_ids, attention_mask, token_type_ids, labels):
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids)
        hidden_states = outputs.hidden_states
        x = torch.stack([hidden_states[i][:, 0] for i in range(1, len(hidden_states))], dim=1)
        lstm_out, _ = self.lstm(x)
        out = self.dropout(lstm_out[:, -1, :])
        logits = self.fc(out)
        loss = nn.CrossEntropyLoss()(logits, labels)
        return loss, logits


#Bert_Attention_Model
class Bert_Attention(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased", output_hidden_states=True)
        self.q = nn.Parameter(torch.randn(1, 768))
        self.w_h = nn.Parameter(torch.randn(768, 256))
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(256, num_classes)
        print("Loaded Bert_Attention")

    def attention(self, h):
        v = torch.matmul(self.q, h.transpose(-2, -1)).squeeze(1)
        v = torch.softmax(v, dim=-1)
        v_temp = torch.matmul(v.unsqueeze(1), h).transpose(-2, -1)
        v = torch.matmul(self.w_h.T, v_temp).squeeze(2)
        return v

    def forward(self, input_ids, attention_mask, token_type_ids, labels):
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids)
        hidden_states = outputs.hidden_states
        x = torch.stack([hidden_states[i][:, 0] for i in range(len(hidden_states) - 12, len(hidden_states))], dim=1)
        attn_out = self.attention(x)
        out = self.dropout(attn_out)
        logits = self.fc(out)
        loss = nn.CrossEntropyLoss()(logits, labels)
        return loss, logits

#Training and Evaluation
def train_eval_model(model, train_loader, test_loader, epochs=3):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs = {k: v.to(device) for k, v in batch.items()}
            loss, _ = model(**inputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1} - Loss: {total_loss:.4f}")

    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in test_loader:
            inputs = {k: v.to(device) for k, v in batch.items()}
            _, logits = model(**inputs)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(inputs['labels'].cpu().tolist())

    inverse_label_map = {v: k for k, v in label_map.items()}
    y_true = [inverse_label_map[l] for l in all_labels]
    y_pred = [inverse_label_map[p] for p in all_preds]
    print(classification_report(y_true, y_pred, digits=3))
    

train_dataset = ABSADataset(train_df["text"].tolist(), train_df["label"].tolist(), tokenizer)
test_dataset = ABSADataset(test_df["text"].tolist(), test_df["label"].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)
model = Bert_Base(num_classes=3)       
train_eval_model(model, train_loader, test_loader)



train_dataset = ABSADataset(train_df["text"].tolist(), train_df["label"].tolist(), tokenizer)
test_dataset = ABSADataset(test_df["text"].tolist(), test_df["label"].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)
model = Bert_LSTM(num_classes=3)       
train_eval_model(model, train_loader, test_loader)



train_dataset = ABSADataset(train_df["text"].tolist(), train_df["label"].tolist(), tokenizer)
test_dataset = ABSADataset(test_df["text"].tolist(), test_df["label"].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)
model = Bert_Attention(num_classes=3)        
train_eval_model(model, train_loader, test_loader)

#### Visualization

In [None]:
import matplotlib.pyplot as plt

models = ['BERT-base', 'BERT-LSTM', 'BERT-Attention', 'BERT-MultiheadAttention']
f1_scores = [0.464, 0.490, 0.582, 0.8529]

plt.figure(figsize=(8, 5))
bars = plt.bar(models, f1_scores, color=['skyblue', 'lightgreen', 'orange', 'purple'])

for bar, score in zip(bars, f1_scores):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.01, f"{score:.3f}", ha='center', va='bottom', fontsize=10)

plt.ylim(0, 1)
plt.ylabel('Weighted Avg F1 Score')
plt.title('Weighted F1 Comparison Across BERT-Based SOTA Models')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()