In [5]:
import torch
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Dataset

In [6]:
df = pd.read_csv('/kaggle/input/restaurant-review/temporary_result.csv')
df

Unnamed: 0.1,Unnamed: 0,text,category,topic_food,score_food,topic_place,score_place,topic_price,score_price,topic_service,score_service,topic_count,main_topics,primary_topic,primary_score
0,0,"To be completely fair, the only redeeming fact...",food,1,0.223695,0,0.000000,0,0.0,0,0.000000,1,food,food,0.223695
1,1,"The food is uniformly exceptional, with a very...",food,1,0.515413,1,0.378481,0,0.0,0,0.000000,2,"food, place",food,0.515413
2,2,"Not only was the food outstanding, but the lit...",food,1,0.330244,0,0.000000,0,0.0,0,0.000000,1,food,food,0.330244
3,3,It is very overpriced and not very tasty.,food,1,0.371381,0,0.000000,0,0.0,0,0.000000,1,food,food,0.371381
4,4,Our agreed favorite is the orrechiete with sau...,food,1,0.424373,1,0.321227,0,0.0,0,0.000000,2,"food, place",food,0.424373
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2195,2195,"Warm, comfortable surroundings, nice appointme...",place,1,0.324377,1,0.594371,0,0.0,1,0.418131,3,"food, place, service",place,0.594371
2196,2196,"This is such a lovely, peaceful place to eat o...",place,1,0.330050,1,0.373284,0,0.0,0,0.000000,2,"food, place",place,0.373284
2197,2197,"This is a great place to take out-of-towners, ...",place,0,0.000000,1,0.386226,0,0.0,0,0.000000,1,place,place,0.386226
2198,2198,"You will pay a lot for the decore, but the foo...",place,1,0.322529,1,0.377474,0,0.0,0,0.000000,2,"food, place",place,0.377474


# ABSA using Pretrained Model

In [10]:
model_name = "yangheng/deberta-v3-base-absa-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
device_index = 0 if device == 'cuda' else -1

Using device: cuda


In [12]:
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device_index)

In [13]:
def extract_aspect_sentiments(dataset, aspects, max_length=50):
    for row in tqdm(dataset, desc="Processing rows", unit="row"):
        text = row['text']
        for aspect in aspects:
            # Check if the aspect needs to be analyzed
            if row[f'topic_{aspect}'] == 1:
                result = classifier(text, text_pair=aspect)[0]
                row[f'sentiment_{aspect}'] = result.get("label")
            else:
                row[f'sentiment_{aspect}'] = "Not Found"
    return dataset

In [14]:
aspects = ["food", "place", "price", "service"]

In [15]:
updated_data = extract_aspect_sentiments(df.to_dict(orient='records'), aspects)
df_updated = pd.DataFrame(updated_data)

Processing rows:   0%|          | 8/2200 [00:00<02:29, 14.65row/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing rows: 100%|██████████| 2200/2200 [01:03<00:00, 34.44row/s]


In [16]:
df_updated

Unnamed: 0.1,Unnamed: 0,text,category,topic_food,score_food,topic_place,score_place,topic_price,score_price,topic_service,score_service,topic_count,main_topics,primary_topic,primary_score,sentiment_food,sentiment_place,sentiment_price,sentiment_service
0,0,"To be completely fair, the only redeeming fact...",food,1,0.223695,0,0.000000,0,0.0,0,0.000000,1,food,food,0.223695,Positive,Not Found,Not Found,Not Found
1,1,"The food is uniformly exceptional, with a very...",food,1,0.515413,1,0.378481,0,0.0,0,0.000000,2,"food, place",food,0.515413,Positive,Neutral,Not Found,Not Found
2,2,"Not only was the food outstanding, but the lit...",food,1,0.330244,0,0.000000,0,0.0,0,0.000000,1,food,food,0.330244,Positive,Not Found,Not Found,Not Found
3,3,It is very overpriced and not very tasty.,food,1,0.371381,0,0.000000,0,0.0,0,0.000000,1,food,food,0.371381,Negative,Not Found,Not Found,Not Found
4,4,Our agreed favorite is the orrechiete with sau...,food,1,0.424373,1,0.321227,0,0.0,0,0.000000,2,"food, place",food,0.424373,Neutral,Neutral,Not Found,Not Found
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2195,2195,"Warm, comfortable surroundings, nice appointme...",place,1,0.324377,1,0.594371,0,0.0,1,0.418131,3,"food, place, service",place,0.594371,Neutral,Positive,Not Found,Positive
2196,2196,"This is such a lovely, peaceful place to eat o...",place,1,0.330050,1,0.373284,0,0.0,0,0.000000,2,"food, place",place,0.373284,Neutral,Positive,Not Found,Not Found
2197,2197,"This is a great place to take out-of-towners, ...",place,0,0.000000,1,0.386226,0,0.0,0,0.000000,1,place,place,0.386226,Not Found,Positive,Not Found,Not Found
2198,2198,"You will pay a lot for the decore, but the foo...",place,1,0.322529,1,0.377474,0,0.0,0,0.000000,2,"food, place",place,0.377474,Negative,Neutral,Not Found,Not Found


In [17]:
output_path = "updated_aspect_sentiments.csv"
df_updated.to_csv(output_path, index=False)
print(f"Updated aspect-based sentiment results saved to {output_path}")

Updated aspect-based sentiment results saved to updated_aspect_sentiments.csv
