<a href="https://colab.research.google.com/github/GinuraAdikari/InsightHive/blob/Sentiment_Analysis/Model_ABSA_DataSet2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer, AdamW, get_scheduler
import numpy as np
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

ABSA = pd.read_csv("ABSA_dataset.csv")

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [None]:
ABSA.head()

Unnamed: 0,tokens,aspect_mask,sentiment
0,"['one', 'best', 'game', 'music', 'soundtrack',...","[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, ...",1
1,"['best', 'purchase', 'ever', 'bought', 'ex', '...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, ...",1
2,"['book', 'slow', 'weak', 'one', 'best', '##st'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, ...",0
3,"['must', '##rea', '##d', 'every', 'southern', ...","[0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, ...",1
4,"['horrible', 'watch', 'napoleon', 'want', 'fun...","[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",0


In [None]:
from sklearn.model_selection import train_test_split

# train (80%) and test (20%)
train_df, test_df = train_test_split(ABSA, test_size=0.2, random_state=42, stratify=ABSA["sentiment"])

print("Train and test datasets created!")

Train and test datasets created!


In [None]:
train_df.head()

Unnamed: 0,tokens,aspect_mask,sentiment
163,"['hey', 'hey', 'wanted', 'say', 'awesome', 'cd...","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, ...",1
549,"['fantastic', 'information', 'i', 've', 'learn...","[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
988,"['son', 'law', 'loved']","[1, 1, 0]",1
512,"['seen', 'old', 'movie', 'many', 'time', 'grea...","[0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0]",1
882,"['work', 'great', 'house', 'extend', 'wi', '##...","[0, 1, 1, 1, 1, 1, 1, 1]",1


In [None]:
test_df.head()

Unnamed: 0,tokens,aspect_mask,sentiment
684,"['book', 'must', 'must', 'pas', 'child', 'fina...","[1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, ...",1
972,"['reading', 'review', 'almost', 'regretted', '...","[1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, ...",1
1045,"['bought', 'husband', 'galaxy', 'tab', 'use', ...","[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]",1
845,"['fast', 'easy', 'setup', 'ca', 'nt', 'ask', '...","[1, 1, 1, 0, 0, 0, 0, 0]",1
936,"['log', '##ite', '##ch', 'wireless', 'gaming',...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, ...",1


In [None]:
class ABSADataset(Dataset):
    def __init__(self, df, tokenizer, max_length=128):
        self.tokens = df["tokens"].tolist()
        self.aspect_masks = df["aspect_mask"].tolist()
        self.sentiments = df["sentiment"].tolist()
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        tokens = self.tokens[idx]

        # ✅ Ensure tokens are lists (avoid string format issues)
        if isinstance(tokens, str):
            tokens = eval(tokens)
        elif not isinstance(tokens, list):
            tokens = [tokens]

        input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

        # ✅ Use "aspect_mask" directly
        aspect_mask = self.aspect_masks[idx]
        if isinstance(aspect_mask, str):
            aspect_mask = eval(aspect_mask)
        elif not isinstance(aspect_mask, list):
            aspect_mask = [aspect_mask]

        # ✅ Create attention mask (1 for actual tokens, 0 for padding)
        attention_mask = [1] * len(input_ids)

        # ✅ Padding & Truncation
        padding_length = self.max_length - len(input_ids)
        if padding_length > 0:
            input_ids += [0] * padding_length
            attention_mask += [0] * padding_length
            aspect_mask += [0] * padding_length
        else:
            input_ids = input_ids[:self.max_length]
            attention_mask = attention_mask[:self.max_length]
            aspect_mask = aspect_mask[:self.max_length]

        return {
            "input_ids": torch.tensor(input_ids, dtype=torch.long),
            "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
            "aspect_mask": torch.tensor(aspect_mask, dtype=torch.long),
            "labels": torch.tensor(self.sentiments[idx], dtype=torch.long)
        }

print("✅ ABSADataset class loaded successfully!")

✅ ABSADataset class loaded successfully!


In [None]:
class BertForABSA(nn.Module):
    def __init__(self, bert_model="bert-base-uncased", num_labels=2):
        super(BertForABSA, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model)
        self.aspect_attention = nn.Linear(self.bert.config.hidden_size, 1)  # Aspect-aware attention
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)  # Sentiment classification
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_ids, attention_mask, aspect_mask, labels=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state

        # ✅ Apply aspect-aware attention
        aspect_weights = torch.softmax(self.aspect_attention(hidden_states), dim=1)
        aspect_weights = aspect_weights * aspect_mask.unsqueeze(-1)
        aspect_representations = hidden_states * aspect_weights

        # ✅ Use sum instead of mean for better aspect representation
        pooled_output = torch.sum(aspect_representations, dim=1)

        # Classification layer
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss = None
        if labels is not None:
            loss = loss_fn(logits, labels)

        return loss, logits

In [None]:
# ✅ Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# ✅ Load dataset
train_dataset = ABSADataset(train_df, tokenizer)
test_dataset = ABSADataset(test_df, tokenizer)

# ✅ Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# ✅ Initialize Model
model = BertForABSA(num_labels=2).to(device)

# ✅ Compute class weights for imbalance handling
class_labels = np.array([0, 1])
class_weights = compute_class_weight("balanced", classes=class_labels, y=train_df["sentiment"].values)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# ✅ Define weighted loss function
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

# ✅ Define optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
num_epochs = 3
num_training_steps = len(train_dataloader) * num_epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=100, num_training_steps=num_training_steps)

# ✅ Training Loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}

        loss, logits = model(batch["input_ids"], batch["attention_mask"], batch["aspect_mask"], batch["labels"])
        loss = loss_fn(logits, batch["labels"])

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {total_loss / len(train_dataloader):.4f}")



Epoch 1: Loss = 0.6875
Epoch 2: Loss = 0.6745
Epoch 3: Loss = 0.4988


In [17]:
# ✅ Evaluate Model
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for batch in test_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        _, logits = model(batch["input_ids"], batch["attention_mask"], batch["aspect_mask"])
        predictions = torch.argmax(logits, dim=-1).cpu().numpy()
        y_pred.extend(predictions)
        y_true.extend(batch["labels"].cpu().numpy())

# ✅ Print classification report
print(classification_report(y_true, y_pred, target_names=["Negative", "Positive"]))

              precision    recall  f1-score   support

    Negative       0.71      0.68      0.70        82
    Positive       0.86      0.87      0.87       180

    accuracy                           0.81       262
   macro avg       0.78      0.78      0.78       262
weighted avg       0.81      0.81      0.81       262



In [21]:
import spacy

# ✅ Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

def extract_aspects(sentence):
    """
    Extracts potential aspects from a sentence using POS tagging.
    Returns a list of extracted aspect terms.
    """
    doc = nlp(sentence)
    aspects = [token.text for token in doc if token.pos_ in ["NOUN"]]  # Extract only nouns
    return aspects

def predict_aspect_sentiment_auto(model, tokenizer, sentence):
    """
    Automatically extracts aspects from a sentence and predicts their sentiment.
    Returns a dictionary {aspect: sentiment}.
    """
    aspects = extract_aspects(sentence)  # ✅ Extract aspects using POS tagging
    model.eval()
    inputs = tokenizer(sentence, padding="max_length", truncation=True, return_tensors="pt").to(device)

    aspect_sentiments = {}

    for aspect in aspects:
        aspect_tokens = tokenizer.tokenize(aspect)
        aspect_mask = torch.zeros_like(inputs["input_ids"], dtype=torch.float).to(device)

        for i in range(len(inputs["input_ids"][0]) - len(aspect_tokens) + 1):
            if tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][i: i + len(aspect_tokens)]) == aspect_tokens:
                aspect_mask[0, i: i + len(aspect_tokens)] = 1

        with torch.no_grad():
            _, logits = model(inputs["input_ids"], inputs["attention_mask"], aspect_mask)

        sentiment_label = torch.argmax(logits, dim=-1).cpu().item()
        aspect_sentiments[aspect] = "Positive" if sentiment_label == 1 else "Negative"

    return aspect_sentiments

# ✅ Example Usage
sentence = "One of the best game music soundtracks - for a game I didn't really play: Despite the fact that I have only played a small portion of the game, the music I heard (plus the connection to Chrono Trigger which was great as well) led me to purchase the soundtrack, and it remains one of my favorite albums."

result = predict_aspect_sentiment_auto(model, tokenizer, sentence)
print(result)

{'game': 'Positive', 'music': 'Negative', 'soundtracks': 'Negative', 'fact': 'Positive', 'portion': 'Negative', 'connection': 'Negative', 'soundtrack': 'Negative', 'albums': 'Negative'}


Input Representation for Aspect Awareness

In [None]:
def convert_example_to_features(sentence, aspect, tokenizer, max_length=128):
    """
    Converts a sentence into BERT input features, including an aspect mask.

    - sentence: Full review text
    - aspect: Aspect term (e.g., 'food', 'service')
    - tokenizer: BERT tokenizer
    - max_length: Maximum sequence length
    """
    tokens = tokenizer.tokenize(sentence)
    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # Create attention mask
    attention_mask = [1] * len(input_ids)

    # Generate aspect mask (1 for aspect-related tokens, 0 otherwise)
    aspect_mask = [0] * len(input_ids)
    aspect_tokens = tokenizer.tokenize(aspect)

    for i in range(len(tokens) - len(aspect_tokens) + 1):
        if tokens[i: i + len(aspect_tokens)] == aspect_tokens:
            aspect_mask[i: i + len(aspect_tokens)] = [1] * len(aspect_tokens)

    # Pad sequences
    padding_length = max_length - len(input_ids)
    input_ids += [0] * padding_length
    attention_mask += [0] * padding_length
    aspect_mask += [0] * padding_length

    return {
        "input_ids": torch.tensor(input_ids, dtype=torch.long),
        "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
        "aspect_mask": torch.tensor(aspect_mask, dtype=torch.long)
    }

BERT model for Aspect-Aware Attention

In [None]:
from transformers import BertPreTrainedModel, BertModel
import torch
import torch.nn as nn

class BertForABSA(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.aspect_attention = nn.Linear(config.hidden_size, 1)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, input_ids, attention_mask, aspect_mask, labels=None):
        # Get BERT output
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_dim)

        # Apply aspect-aware attention
        aspect_weights = torch.sigmoid(self.aspect_attention(hidden_states))  # Compute attention scores
        aspect_weights = aspect_weights * aspect_mask.unsqueeze(-1)  # Apply aspect mask
        aspect_representations = hidden_states * aspect_weights  # Scale hidden states by attention weights

        # Pool aspect-aware representations
        pooled_output = aspect_representations.mean(dim=1)

        # Classification layer
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        # Compute loss
        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits, labels)

        return loss, logits


Training pipeline

In [None]:
from torch.utils.data import Dataset
import torch

class ABSADataset(Dataset):
    def __init__(self, df, tokenizer, max_length=128):
        self.tokens = df["tokens"].tolist()
        self.aspect_masks = df["aspect_mask"].tolist()
        self.sentiments = df["sentiment"].tolist()
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        tokens = self.tokens[idx]

        # Ensure tokens is a list
        if isinstance(tokens, str):  # If stored as string, convert to list
            tokens = eval(tokens)
        elif not isinstance(tokens, list):  # If somehow still not list, wrap in list
            tokens = [tokens]

        input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

        # Ensure aspect_mask is a list
        aspect_mask = self.aspect_masks[idx]
        if isinstance(aspect_mask, str):
            aspect_mask = eval(aspect_mask)
        elif not isinstance(aspect_mask, list):
            aspect_mask = [aspect_mask]

        # Create attention mask (1 for actual tokens, 0 for padding)
        attention_mask = [1] * len(input_ids)

        # **🔹 Fix: Ensure consistent sequence length using padding**
        if len(input_ids) > self.max_length:
            input_ids = input_ids[:self.max_length]
            attention_mask = attention_mask[:self.max_length]
            aspect_mask = aspect_mask[:self.max_length]
        else:
            padding_length = self.max_length - len(input_ids)
            input_ids += [0] * padding_length
            attention_mask += [0] * padding_length
            aspect_mask += [0] * padding_length

        return {
            "input_ids": torch.tensor(input_ids, dtype=torch.long),
            "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
            "aspect_mask": torch.tensor(aspect_mask, dtype=torch.long),
            "labels": torch.tensor(self.sentiments[idx], dtype=torch.long)
        }

print("✅ Dataset class loaded successfully!")


✅ Dataset class loaded successfully!


class imbalance

In [None]:
import torch
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

train_dataset = ABSADataset(train_df, tokenizer)
test_dataset = ABSADataset(test_df, tokenizer)

# ✅ Fix: Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Fix: Ensure `y` is a NumPy array
class_labels = np.array([0, 1])  # Only two classes now
class_weights = compute_class_weight("balanced", classes=class_labels, y=train_df["sentiment"].values)

class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)

In [None]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print("Train and Test DataLoaders are ready!")

Train and Test DataLoaders are ready!


Train

In [None]:
from transformers import AdamW, get_scheduler
import torch

# Initialize model
model = BertForABSA.from_pretrained("bert-base-uncased", num_labels=2)  # 3 sentiment classes (Neg, Neu, Pos)

# Define optimizer with weight decay for better generalization
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

num_epochs = 3

# Define learning rate scheduler
num_training_steps = len(train_dataloader) * num_epochs
lr_scheduler = get_scheduler(
    "linear", optimizer=optimizer, num_warmup_steps=100, num_training_steps=num_training_steps
)

# Set up device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training Loop
print("🚀 Starting Training...")

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    num_batches = len(train_dataloader)

    for step, batch in enumerate(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        loss, logits = model(
            batch["input_ids"],
            batch["attention_mask"],
            batch["aspect_mask"],
            batch["labels"]
        )

        loss = loss_fn(logits, batch["labels"])

        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()

        # Print loss every 50 steps
        if (step + 1) % 50 == 0 or (step + 1) == num_batches:
            print(f"Epoch [{epoch+1}/{num_epochs}] Step [{step+1}/{num_batches}] - Loss: {loss.item():.4f}")

    avg_loss = total_loss / num_batches
    print(f"✅ Epoch {epoch+1} Completed! Avg Loss: {avg_loss:.4f}")

print("🎉 Training Finished!")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForABSA were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['aspect_attention.bias', 'aspect_attention.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🚀 Starting Training...
Epoch [1/3] Step [50/66] - Loss: 0.6844
Epoch [1/3] Step [66/66] - Loss: 0.7161
✅ Epoch 1 Completed! Avg Loss: 0.6916
Epoch [2/3] Step [50/66] - Loss: 0.7050
Epoch [2/3] Step [66/66] - Loss: 0.6266
✅ Epoch 2 Completed! Avg Loss: 0.6270
Epoch [3/3] Step [50/66] - Loss: 0.4958
Epoch [3/3] Step [66/66] - Loss: 0.4560
✅ Epoch 3 Completed! Avg Loss: 0.4771
🎉 Training Finished!


Evaluate

In [None]:
from sklearn.metrics import classification_report

def evaluate_model(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for batch in dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            _, logits = model(batch["input_ids"], batch["attention_mask"], batch["aspect_mask"])
            predictions = torch.argmax(logits, dim=-1).cpu().numpy()
            y_pred.extend(predictions)
            y_true.extend(batch["labels"].cpu().numpy())

    print(classification_report(y_true, y_pred, target_names=["Negative", "Positive"]))

# Run evaluation
evaluate_model(model, test_dataloader)


              precision    recall  f1-score   support

    Negative       0.82      0.66      0.73        82
    Positive       0.86      0.93      0.89       180

    accuracy                           0.85       262
   macro avg       0.84      0.80      0.81       262
weighted avg       0.84      0.85      0.84       262



hyperparameter tunning

In [None]:
import torch
import torch.optim as optim
from transformers import get_scheduler
from sklearn.model_selection import ParameterGrid
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report

# Define possible hyperparameters
param_grid = {
    "learning_rate": [2e-5, 3e-5, 5e-5],  # Common BERT fine-tuning rates
    "batch_size": [8, 16],  # Trade-off between speed and performance
    "weight_decay": [0.01, 0.001]  # Regularization to avoid overfitting
}

# Create all possible hyperparameter combinations
param_combinations = list(ParameterGrid(param_grid))

best_model = None
best_f1_score = 0  # Track the best model based on F1-score

for params in param_combinations:
    print(f"\n🚀 Training with params: {params}")

    # Load model
    model = BertForABSA.from_pretrained("bert-base-uncased", num_labels=2)
    model.to(device)

    # Optimizer & Learning Rate Scheduler
    optimizer = optim.AdamW(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
    num_training_steps = len(train_dataloader) * 3  # Assuming 3 epochs
    lr_scheduler = get_scheduler(
        "linear", optimizer=optimizer, num_warmup_steps=100, num_training_steps=num_training_steps
    )

    # Loss function
    criterion = torch.nn.CrossEntropyLoss()

    # Data Loaders
    train_dataloader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True)
    val_dataloader = DataLoader(test_dataset, batch_size=params["batch_size"])

    # Training Loop
    for epoch in range(3):
        model.train()
        total_loss = 0
        for batch in train_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            optimizer.zero_grad()

            loss, logits = model(batch["input_ids"], batch["attention_mask"], batch["aspect_mask"], batch["labels"])
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            lr_scheduler.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1} - Loss: {total_loss / len(train_dataloader)}")

    # Validate Model
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for batch in val_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            _, logits = model(batch["input_ids"], batch["attention_mask"], batch["aspect_mask"])
            predictions = torch.argmax(logits, dim=-1).cpu().numpy()
            y_pred.extend(predictions)
            y_true.extend(batch["labels"].cpu().numpy())

    # Compute F1-score
    report = classification_report(y_true, y_pred, target_names=["Negative", "Positive"], output_dict=True)
    f1_score = report["macro avg"]["f1-score"]
    print(classification_report(y_true, y_pred, target_names=["Negative", "Positive"]))

    # Save best model based on F1-score
    if f1_score > best_f1_score:
        best_f1_score = f1_score
        best_model = model
        best_params = params

# Save the best model
best_model.save_pretrained("best_absa_model")
print(f"✅ Best model saved with params: {best_params} (F1-score: {best_f1_score:.4f})")


🚀 Training with params: {'batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.01}


Some weights of BertForABSA were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['aspect_attention.bias', 'aspect_attention.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1 - Loss: 0.6534200262932377
Epoch 2 - Loss: 0.5424236830409247
Epoch 3 - Loss: 0.5091745596350604
              precision    recall  f1-score   support

    Negative       0.70      0.39      0.50        82
    Positive       0.77      0.92      0.84       180

    accuracy                           0.76       262
   macro avg       0.73      0.66      0.67       262
weighted avg       0.75      0.76      0.73       262


🚀 Training with params: {'batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.001}


Some weights of BertForABSA were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['aspect_attention.bias', 'aspect_attention.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1 - Loss: 0.6641075499640167
Epoch 2 - Loss: 0.5481806626074187
Epoch 3 - Loss: 0.395243507873921
              precision    recall  f1-score   support

    Negative       0.80      0.65      0.72        82
    Positive       0.85      0.93      0.89       180

    accuracy                           0.84       262
   macro avg       0.83      0.79      0.80       262
weighted avg       0.84      0.84      0.83       262


🚀 Training with params: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.01}


Some weights of BertForABSA were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['aspect_attention.bias', 'aspect_attention.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
