### Load Data

In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv("D:/MSc/Sentiment & Emotion Intelligence/dataset/Product Reviews - ABSA.csv", 
                 encoding='utf-8')
print(df.shape)
df.head()

(1963, 6)


Unnamed: 0,Category,Rating,Review,Sentiment,Emotion,Decisions
0,Amazon Alexa,4,I liked this gen very much. The only disadvant...,Neutral,Sadness,"{""Overall Quality"": ""Positive"", ""Voice Recogni..."
1,Amazon Alexa,3,This latest version of Echo Dot may be a bette...,Negative,Anger,"{""Voice Recognition"": ""Negative"", ""Music Strea..."
2,Amazon Alexa,4,it could not be possible to reset it at my ne...,Negative,Sadness,"{""Functionality"": ""Negative""}"
3,Amazon Alexa,4,Nothing great then previous ones .,Neutral,Neutral,"{""Overall Quality"": ""Negative""}"
4,Amazon Alexa,4,AwesomeBut some time can not listenOver all good,Positive,Happiness,"{""Sound Quality"": ""Negative"", ""Overall Quality..."


In [3]:
# data info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1963 entries, 0 to 1962
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Category   1963 non-null   object
 1   Rating     1963 non-null   int64 
 2   Review     1963 non-null   object
 3   Sentiment  1963 non-null   object
 4   Emotion    1963 non-null   object
 5   Decisions  1963 non-null   object
dtypes: int64(1), object(5)
memory usage: 92.1+ KB


### Data Preprocessing

In [4]:
import re
import contractions
from bs4 import BeautifulSoup
import nltk
import emoji
from warnings import filterwarnings

filterwarnings('ignore')

In [5]:
# Download resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [6]:
# Lowercase
def to_lowercase(text):
    return text.lower()

# Expand Contractions
def expand_contractions(text):
    return contractions.fix(text)

# Remove HTML Tags
def remove_html(text):
    return BeautifulSoup(text, "html.parser").get_text()

# Remove Emojis
def remove_emojis(text):
    return emoji.replace_emoji(text, replace='')

# Normalize Clothing Sizes
def normalize_clothing_sizes(text):
    # Convert height in feet/inches to cm (approximate)
    pattern = re.compile(r"(\d)'\s?(\d{1,2})\"?")
    def convert(match):
        feet = int(match.group(1))
        inches = int(match.group(2))
        cm = round((feet * 12 + inches) * 2.54)
        return f"{cm} cm"
    return pattern.sub(convert, text)

# Remove URLs
def remove_urls(text):
    return re.sub(r'http\S+|www\S+|https\S+', '', text)

# Remove Special Characters (but keep alphabets and category terms)
def remove_special_chars(text):
    text = re.sub(r'[^A-Za-z0-9\s]', '', text)  # Keep alphanumeric only
    return text

# Normalize Whitespace & Line Breaks
def normalize_whitespace(text):
    return re.sub(r'\s+', ' ', text).strip()

In [7]:
# Data Cleaning Pipeline
def clean_review_absa(text):
    if pd.isnull(text) or not isinstance(text, str):
        return ""

    text = to_lowercase(text)
    text = expand_contractions(text)
    text = remove_html(text)
    text = remove_emojis(text)
    text = normalize_clothing_sizes(text)
    text = remove_urls(text)
    text = remove_special_chars(text)
    text = normalize_whitespace(text)
    
    return text

In [8]:
# Apply Data Cleaning
df['Cleaned_Reviews'] = df['Review'].apply(clean_review_absa)
print(df.shape)
df.head()

(1963, 7)


Unnamed: 0,Category,Rating,Review,Sentiment,Emotion,Decisions,Cleaned_Reviews
0,Amazon Alexa,4,I liked this gen very much. The only disadvant...,Neutral,Sadness,"{""Overall Quality"": ""Positive"", ""Voice Recogni...",i liked this gen very much the only disadvanta...
1,Amazon Alexa,3,This latest version of Echo Dot may be a bette...,Negative,Anger,"{""Voice Recognition"": ""Negative"", ""Music Strea...",this latest version of echo dot may be a bette...
2,Amazon Alexa,4,it could not be possible to reset it at my ne...,Negative,Sadness,"{""Functionality"": ""Negative""}",it could not be possible to reset it at my new...
3,Amazon Alexa,4,Nothing great then previous ones .,Neutral,Neutral,"{""Overall Quality"": ""Negative""}",nothing great then previous ones
4,Amazon Alexa,4,AwesomeBut some time can not listenOver all good,Positive,Happiness,"{""Sound Quality"": ""Negative"", ""Overall Quality...",awesomebut some time can not listenover all good


In [27]:
df.to_csv("D:/MSc/Sentiment & Emotion Intelligence/dataset/Product Reviews - ABSA - Cleaned.csv", index=False)

### Train-Test Split

In [9]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df['Category'])

print("Train Dataset: ", train_df.shape[0])
print("Testing Dataset: ", test_df.shape[0])

Train Dataset:  1668
Testing Dataset:  295


### Few-Shot Selection

In [10]:
import json
from collections import defaultdict

# Few-shot example selection per category
def select_diverse_fewshot_examples(category_df, k=10):
    aspect_sentiment_map = defaultdict(list)
    
    # Group reviews by (aspect, sentiment) to ensure diversity
    for _, row in category_df.iterrows():
        try:
            label_dict = json.loads(row["Decisions"])  # Convert string to dict
        except:
            label_dict = row["Decisions"]  # In case it's already a dict

        review = row["Cleaned_Reviews"]
        
        # Store the full label_dict under a unique key (tuple of sorted items)
        key = tuple(sorted(label_dict.items()))  # To uniquely identify this combination
        
        aspect_sentiment_map[key].append(review)
    
    # Shuffle and select up to k diverse examples
    selected = []
    used_keys = set()
    
    for key, reviews in aspect_sentiment_map.items():
        if len(selected) >= k:
            break
        if key not in used_keys:
            # Convert tuple of tuples to a dictionary
            label_dict = dict(key)
            selected.append((reviews[0], label_dict))  # pick one example per group
            used_keys.add(key)
    
    # Fill remaining slots with random samples if needed
    while len(selected) < k:
        row = category_df.sample(1).iloc[0]
        try:
            label_dict = json.loads(row["Decisions"])
        except:
            label_dict = row["Decisions"]
        selected.append((row["Cleaned_Reviews"], label_dict))
    
    return selected[:k]

In [36]:
category_examples = train_df[train_df["Category"] == "Amazon Alexa"]
fewshot_examples = select_diverse_fewshot_examples(category_examples, k=10)
fewshot_examples

[('it is an excellent companion however it has few flaws as listed below as per two days of usage1 playing same songs again and again forcing to buy a package from the music partners also the playlist always starts from the same song and continues the same list even though shuffle is enabled that is why 1 star less2 not giving appropriate answers sometimes3 not having a memory like google assistant not remembering the things happened4 even though profile is created for specific person it is responding to all voices no use for profile creation except remembering the profile nameoverall it needs a few updates to solve the bugs however it is the best companion and a new family member',
  {'Memory Function': 'Negative',
   'Music Playback': 'Negative',
   'Overall Quality': 'Positive',
   'User Profile Handling': 'Negative',
   'Voice Recognition': 'Negative'}),
 ('now it is our member of familybest in all respects',
  {'Overall Quality': 'Positive'}),
 ('no batteries always needs to be pl

In [37]:
for example, label_dict in fewshot_examples:
    print(f"Review: {example}\nABSA: {json.dumps(label_dict)}\n")

Review: it is an excellent companion however it has few flaws as listed below as per two days of usage1 playing same songs again and again forcing to buy a package from the music partners also the playlist always starts from the same song and continues the same list even though shuffle is enabled that is why 1 star less2 not giving appropriate answers sometimes3 not having a memory like google assistant not remembering the things happened4 even though profile is created for specific person it is responding to all voices no use for profile creation except remembering the profile nameoverall it needs a few updates to solve the bugs however it is the best companion and a new family member
ABSA: {"Memory Function": "Negative", "Music Playback": "Negative", "Overall Quality": "Positive", "User Profile Handling": "Negative", "Voice Recognition": "Negative"}

Review: now it is our member of familybest in all respects
ABSA: {"Overall Quality": "Positive"}

Review: no batteries always needs to 

### LLM Prompt Construction

In [11]:
def build_prompt(review, category, train_df):
    category_examples = train_df[train_df["Category"] == category]
    fewshot_examples = select_diverse_fewshot_examples(category_examples, k=10)

    prompt = (
"""
You are an expert in Aspect-Based Sentiment Analysis (ABSA) for e-commerce reviews.
Extract product-related aspects (explicit or implicit) and assign sentiment ("Positive", "Negative", "Neutral").

⚠️ Only extract meaningful aspects related to the product's qualities or functions. Ignore delivery experience, pricing (unless about value), seller or service feedback, and unrelated opinions.

📌 Output Format (per review):
{
  "Aspect 1": "Sentiment",
  "Aspect 2": "Sentiment",
  ...
}

🎯 Common Aspects Across Categories:
- **General**: Quality, Durability, Design, Size, Color, Fit, Packaging, Instructions, Functionality, Value, Material, Image Accuracy
- **Electronics**: Battery Life, Sound Quality, Display, Performance, Connectivity, Portability, Build, Accessories
- **Clothing**: Comfort, Stretch, Fabric, Fit, Style, Stitching
- **Musical Instruments**: Tuning Stability, Tone, Sustain, Playability, Craftsmanship
- **Toys & Games**: Safety, Educational Value, Engagement, Durability
- **Health & Beauty**: Effectiveness, Scent, Skin Feel, Absorption, Sensitivity
- **Pet Supplies**: Chew Resistance, Safety, Pet Engagement, Allergic Reaction
- **Home & Garden**: Assembly, Cleaning Ease, Sturdiness, Weather Resistance
- **Office Supplies**: Ink Quality, Print Clarity, Ergonomics, Grip, Paper Compatibility

🧠 Special Cases:

1. **No clear aspect** → Use `"Overall Quality": "Sentiment"`
   - "Works well!" → {"Overall Quality": "Positive"}

2. **Sarcasm / Implicit sentiment**
   - "Fantastic. Broke in two days." → {"Durability": "Negative"}
   - "It's supposed to glow, but it doesn't." → {"Functionality": "Negative"}

3. **Mixed Sentiment**
   - "Comfortable shoes, but stitching came loose." → {"Comfort": "Positive", "Stitching": "Negative"}

4. **Image or Expectation Mismatch**
   - "Looks nothing like the ad." → {"Image Accuracy": "Negative"}

5. **Factual with no sentiment** → Assign "Neutral"
   - "Smaller than expected." → {"Size": "Neutral"}

"""
    )

    for example, label_dict in fewshot_examples:
        prompt += f"Review: {example}\n→ ABSA: {json.dumps(label_dict)}\n"

    prompt += f"\nNow, analyze the following review and return the output as a valid JSON object:\nReview: {review}\nABSA:"
    
    return prompt

In [48]:
prompt = build_prompt(test_df['Review'].iloc[0], test_df['Category'].iloc[0], train_df)
print(prompt)


You are an expert in Aspect-Based Sentiment Analysis (ABSA) for e-commerce reviews.
Extract product-related aspects (explicit or implicit) and assign sentiment ("Positive", "Negative", "Neutral").

⚠️ Only extract meaningful aspects related to the product's qualities or functions. Ignore delivery experience, pricing (unless about value), seller or service feedback, and unrelated opinions.

📌 Output Format (per review):
{
  "Aspect 1": "Sentiment",
  "Aspect 2": "Sentiment",
  ...
}

---

🎯 Common Aspects Across Categories:
- **General**: Quality, Durability, Design, Size, Color, Fit, Packaging, Instructions, Functionality, Value, Material, Image Accuracy
- **Electronics**: Battery Life, Sound Quality, Display, Performance, Connectivity, Portability, Build, Accessories
- **Clothing**: Comfort, Stretch, Fabric, Fit, Style, Stitching
- **Musical Instruments**: Tuning Stability, Tone, Sustain, Playability, Craftsmanship
- **Toys & Games**: Safety, Educational Value, Engagement, Durability

### Model Inference

In [12]:
# Setup LLM
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq

load_dotenv()

llm = ChatGroq(model="llama3-70b-8192", temperature=0.3, api_key=os.getenv("GROQ_API_KEY"))

In [13]:
# Setup LLM Chain 
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template("{input}"))

In [20]:
import ast
from tqdm import tqdm

true_labels = []
pred_absa = []

for _, row in tqdm(test_df.iloc[152:].iterrows(), total=len(test_df.iloc[152:])):
    review = row["Cleaned_Reviews"]
    category = row["Category"]
    true_labels.append(json.loads(row["Decisions"]))
    
    # Build prompt and get prediction
    prompt = build_prompt(review, category, train_df)
    response = llm_chain.run(prompt)
    
    try:
        pred_dict = ast.literal_eval(response.strip())
    except Exception as e:
        pred_dict = {}
    
    pred_absa.append(pred_dict)

100%|██████████| 143/143 [28:45<00:00, 12.07s/it]


In [18]:
pred_absa_1 = pred_absa

In [23]:
new_pred_absa = pred_absa_1 + pred_absa
len(new_pred_absa)

295

In [24]:
new_df = test_df.copy()
new_df['Predicted ABSA'] = new_pred_absa
print(new_df.shape)
new_df.head()

(295, 8)


Unnamed: 0,Category,Rating,Review,Sentiment,Emotion,Decisions,Cleaned_Reviews,Predicted ABSA
927,"Electronics,Media",5,Kindle Voyage is absolutely the best! Easy to ...,Positive,Happiness,"{""Display"": ""Positive"", ""Portability"": ""Positi...",kindle voyage is absolutely the best easy to r...,"{'Display': 'Positive', 'Portability': 'Positi..."
1447,Bottoms,3,I typically wear a 4/6 but am a little bigger ...,Negative,Sadness,"{""Material"": ""Negative"", ""Fit"": ""Negative""}",i typically wear a 46 but am a little bigger r...,"{'Comfort': 'Negative', 'Size': 'Negative'}"
981,Tops,3,"This shirt is beautiful, however i thought tha...",Negative,Sadness,"{""Material"": ""Negative"", ""Fit"": ""Negative""}",this shirt is beautiful however i thought that...,"{'Design': 'Positive', 'Material': 'Negative',..."
206,"Toys & Games,Electronics",5,Gave to my great nephew. 2.5 yrs old. He loves...,Positive,Happiness,"{""Entertainment"": ""Positive""}",gave to my great nephew 25 yrs old he loves it...,{'Parental Controls': 'Positive'}
89,"Toys & Games,Electronics",5,I love that you can read books as well as play...,Positive,Happiness,"{""Reading Functionality"": ""Positive"", ""Games"":...",i love that you can read books as well as play...,"{'Functionality': 'Positive', 'Portability': '..."


In [25]:
new_df.to_csv("D:/MSc/Sentiment & Emotion Intelligence/dataset/Product Reviews - ABSA - Predicted.csv", index=False)