In [None]:
!pip install -q transformers datasets seqeval evaluate accelerate

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    TrainingArguments,
    Trainer,
    DataCollatorForTokenClassification
)
import evaluate
import torch

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for seqeval (setup.py) ... [?25l[?25hdone


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load your dataset
file_path = "/content/drive/MyDrive/dlProject/Datasets/Restaurants_Train_v2.csv" # Update this path if needed
df = pd.read_csv(file_path)

# Rename columns to standard names
df = df.rename(columns={
    "Sentence": "text",
    "Aspect Term": "aspect",
    "from": "start",
    "to": "end"
})

# Filter out bad rows (missing aspects)
df = df[df["aspect"].notna() & (df["aspect"] != "NULL") & (df["aspect"] != "_")].copy()
df["start"] = df["start"].astype(int)
df["end"] = df["end"].astype(int)

print(f"Loaded {len(df)} rows.")

Loaded 3693 rows.


In [None]:
# Group by 'id' so that all aspects for the same sentence are in one list
grouped_df = df.groupby("id").agg({
    "text": "first",
    "start": list,
    "end": list,
    "aspect": list
}).reset_index()

print(f"Original rows: {len(df)}")
print(f"Unique sentences (Training Samples): {len(grouped_df)}")
print("\nSample aggregated row:")
print(grouped_df.iloc[0])

Original rows: 3693
Unique sentences (Training Samples): 2021

Sample aggregated row:
id                                                        3
text      The staff isn't the friendliest or most compet...
start                                               [4, 73]
end                                                 [9, 80]
aspect                                     [staff, service]
Name: 0, dtype: object


In [None]:
# You can swap this for "microsoft/deberta-v3-base" for even better performance
MODEL_NAME = "roberta-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, add_prefix_space=True)

def align_labels(text, start_list, end_list, tokenizer):
    """
    Aligns character-level offsets to token-level BIO labels.
    """
    tokenized_inputs = tokenizer(text, truncation=True, return_offsets_mapping=True)
    offset_mapping = tokenized_inputs["offset_mapping"]

    labels = []

    for start_char, end_char in offset_mapping:
        # Special tokens (CLS, SEP) have offset (0, 0)
        if start_char == 0 and end_char == 0:
            labels.append(0) # "O" tag
            continue

        # Default label is O (0)
        label = 0

        # Check if this token overlaps with ANY of the aspect spans
        for idx, (s, e) in enumerate(zip(start_list, end_list)):
            if start_char == s:
                label = 1 # B-ASP
                break
            elif start_char > s and end_char <= e:
                label = 2 # I-ASP
                break

        labels.append(label)

    return tokenized_inputs, labels

# Process the whole dataset
data_list = []
for _, row in grouped_df.iterrows():
    tkn, lbl = align_labels(row["text"], row["start"], row["end"], tokenizer)
    data_list.append({
        "input_ids": tkn["input_ids"],
        "attention_mask": tkn["attention_mask"],
        "labels": lbl
    })

dataset = Dataset.from_list(data_list)
print("Data processing complete.")

split_dataset = dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

print(f"Train samples: {len(train_dataset)}")
print(f"Eval samples: {len(eval_dataset)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Data processing complete.
Train samples: 1616
Eval samples: 405


In [None]:
seqeval = evaluate.load("seqeval")
label_list = ["O", "B-ASP", "I-ASP"]
id2label = {0: "O", 1: "B-ASP", 2: "I-ASP"}
label2id = {"O": 0, "B-ASP": 1, "I-ASP": 2}

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
model = AutoModelForTokenClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Optimized hyperparameters for RoBERTa-large
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/dlProject/NModal/roberta-absa-results",
    learning_rate=2e-5,
    per_device_train_batch_size=8,  # Reduce to 4 if you run out of memory
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
    logging_steps=50,
    metric_for_best_model="f1"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0991,0.055338,0.887715,0.890066,0.888889,0.981004
2,0.0633,0.027159,0.947507,0.956291,0.951879,0.991039
3,0.0381,0.015625,0.968504,0.977483,0.972973,0.995341
4,0.0148,0.008449,0.985583,0.996026,0.990777,0.998208
5,0.0102,0.005985,0.988173,0.996026,0.992084,0.998447


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0991,0.055338,0.887715,0.890066,0.888889,0.981004
2,0.0633,0.027159,0.947507,0.956291,0.951879,0.991039
3,0.0381,0.015625,0.968504,0.977483,0.972973,0.995341
4,0.0148,0.008449,0.985583,0.996026,0.990777,0.998208
5,0.0102,0.005985,0.988173,0.996026,0.992084,0.998447


TrainOutput(global_step=1265, training_loss=0.05352740617019857, metrics={'train_runtime': 879.5205, 'train_samples_per_second': 11.489, 'train_steps_per_second': 1.438, 'total_flos': 664925624825550.0, 'train_loss': 0.05352740617019857, 'epoch': 5.0})

In [None]:
save_path = "/content/drive/MyDrive/dlProject/NModal/RoBERTa_ABSA_Final"
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model saved to {save_path}")

Model saved to /content/drive/MyDrive/dlProject/NModal/RoBERTa_ABSA_Final


In [None]:
def extract_aspects(text, model, tokenizer):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    inputs = tokenizer(text, return_tensors="pt", truncation=True).to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=2).cpu().numpy()[0]
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

    extracted_aspects = []
    current_aspect = []

    for token, label_id in zip(tokens, predictions):
        label = id2label[label_id]

        clean_token = token.replace("Ġ", " ").replace("Ċ", "")
        if token in [tokenizer.cls_token, tokenizer.sep_token, tokenizer.pad_token]:
            continue

        if label == "B-ASP":
            if current_aspect:
                extracted_aspects.append("".join(current_aspect).strip())
            current_aspect = [clean_token]
        elif label == "I-ASP":
            current_aspect.append(clean_token)
        else:
            if current_aspect:
                extracted_aspects.append("".join(current_aspect).strip())
                current_aspect = []

    if current_aspect:
        extracted_aspects.append("".join(current_aspect).strip())

    return extracted_aspects

# Test it
test_sentence = "The food was delicious "
print("Sentence:", test_sentence)
print("Detected Aspects:", extract_aspects(test_sentence, model, tokenizer))

Sentence: The food was delicious 
Detected Aspects: ['food']


In [None]:
test_examples = [
    "The pizza was delicious, but the crust was a bit burnt.",

    "We loved the ambiance, but the waiter was very rude to us.",

    "The prices are too high for such small portions.",

    "The delivery arrived late and the food was cold.",

    "I will definitely come back again next week!",

    "The wine list is extensive, and the cheesecake is to die for."
]

print(f"{'SENTENCE':<60} | {'DETECTED ASPECTS'}")
print("-" * 85)

for text in test_examples:
    aspects = extract_aspects(text, model, tokenizer)
    print(f"{text:<60} | {aspects}")

SENTENCE                                                     | DETECTED ASPECTS
-------------------------------------------------------------------------------------
The pizza was delicious, but the crust was a bit burnt.      | ['pizza', 'crust']
We loved the ambiance, but the waiter was very rude to us.   | ['ambiance', 'waiter']
The prices are too high for such small portions.             | ['prices', 'portions']
The delivery arrived late and the food was cold.             | ['delivery', 'food']
I will definitely come back again next week!                 | []
The wine list is extensive, and the cheesecake is to die for. | ['wine list', 'cheesecake']


In [None]:

from transformers import Trainer, DataCollatorForTokenClassification
import numpy as np
import evaluate

label_list = ["O", "B-ASP", "I-ASP"]
id2label = {0: "O", 1: "B-ASP", 2: "I-ASP"}

seqeval = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Run evaluation
results = trainer.evaluate(eval_dataset)
print("Evaluation results:")
print(results)


  trainer = Trainer(


Evaluation results:
{'eval_loss': 0.005984941031783819, 'eval_model_preparation_time': 0.0108, 'eval_precision': 0.988173455978975, 'eval_recall': 0.9960264900662251, 'eval_f1': 0.992084432717678, 'eval_accuracy': 0.9984468339307049, 'eval_runtime': 4.1622, 'eval_samples_per_second': 97.305, 'eval_steps_per_second': 12.253}


In [None]:

results = trainer.evaluate(eval_dataset)


print(f"accuracy:  {results['eval_accuracy']:.4f}")
print(f"f1:        {results['eval_f1']:.4f}")
print(f"precision: {results['eval_precision']:.4f}")
print(f"recall:    {results['eval_recall']:.4f}")


accuracy:  0.9984
f1:        0.9921
precision: 0.9882
recall:    0.9960
