# 01-09 : Aspect Based Sentiment Analysis (LLM)

In [1]:
import os
import pandas as pd
import json
from time import sleep
from typing import List, Dict, Tuple
from pprint import pprint
from tqdm.notebook import tqdm

from dotenv import load_dotenv, find_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [2]:
# read local .env file
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
tqdm.pandas()

## Data Load

In [4]:
# load the human feedback dataset
df_source = pd.read_parquet('../../data/interim/01-06_human_classified.parquet')

# show the data loaded
print(df_source.shape)
display(df_source.head(3))

(235, 17)


Unnamed: 0,id,created_at,review_rating,review_title,review_content,business_slug,chatbot_related,chatbot_evidence,chatbot_classification,chatbot_description,chatbot_suggestion,complaint_classification,complaint_service,complaint_description,complaint_suggestion,human_chatbot_classification,human_complaint_classification
0,3344640,2021-01-07 13:22:34,1,No option to speak to the agent on the custome...,Am not able to call vodacom to block my number...,vodacom,1,The customer mentioned that the Tobi bot canno...,Customer care assistance,The customer is complaining about not being ab...,Improve the chatbot's availability and provide...,Customer care,Vodacom,The customer is unable to call Vodacom to bloc...,Provide an option for customers to speak to an...,"limited functionality, unable to contact human...",blacklist
1,3347241,2021-01-10 11:32:59,1,"Airtime charged, but not credited to my phone",Bought Airtime online through the Vodacom App ...,vodacom,1,The customer mentioned trying to chat with TOB...,Customer service,The customer complained about being thrown out...,Improve the stability of the chatbot to preven...,Billing,Airtime,The customer bought airtime online but it was ...,Investigate the issue and credit the airtime t...,"technical error, unable to contact human agent",missing airtime
2,3353838,2021-01-15 11:32:11,1,Chatbot Tobi/ Voice Bundle,I am disappointed at how your service has beco...,vodacom,1,The complaint mentions the introduction of a c...,Limited functionality,The chatbot has made it impossible for custome...,Improve the chatbot's capabilities to handle a...,Service issue,Voice Bundle,Failed to load voice bundle but debited the cu...,Load the customer's voice bundle or reimburse ...,"limited functionality, unable to contact human...",voice bundle


## Aspects

In [5]:
entities_aspects = {
    "Billing & Payments": [
        "Billing accuracy",
        "Payment methods",
        "Refunds/credits",
        "Hidden charges",
        "Monthly costs"
    ],
    "Network & Connectivity": [
        "Signal strength",
        "Network coverage",
        "Data speeds (4G, 5G, etc.)",
        "Call quality",
        "Dropped calls",
        "Roaming"
    ],
    "Customer Service": [
        "Responsiveness",
        "Friendliness/professionalism",
        "Knowledge/competence",
        "Resolution time",
        "Availability (e.g., 24/7 support)"
    ],
    "Chatbots": [
        "User-friendliness",
        "Response accuracy",
        "Speed of response",
        "Ability to understand query",
        "Escalation to human agents"
    ],
    "Account & Plans": [
        "Account management (online portal/apps)",
        "Plan flexibility",
        "Plan pricing",
        "Upgrade/downgrade process",
        "Promotions and offers"
    ],
    "Hardware/Devices": [
        "Setup/ease of installation",
        "Device reliability",
        "Device performance/speed",
        "Rental vs. purchase options",
        "Technical issues"
    ],
    "Value-added Services": [
        "Quality of service",
        "Pricing/value for money",
        "Reliability",
        "Content variety (for streaming)",
        "Ease of use"
    ]
}

## Prompt Template

In [6]:
# get the entities and aspects as a string
entities_aspects_str = json.dumps(entities_aspects, indent=4)

In [7]:
# an example of the output the llm should produce
output_example = {
    "Customer Service": {
        "Responsiveness": "Negative",
        "Friendliness/professionalism": "Negative",
        "Knowledge/competence": "Negative",
        "Availability (e.g., 24/7 support)": "Negative"
    },
    "Chatbots": {
        "User-friendliness": "Neutral",
        "Response accuracy": "Negative",
        "Ability to understand query": "Negative"
    }
}

output_example_str = json.dumps(output_example, indent=4)

In [8]:
# set the template string
template_string = """\
Given the following entities and aspects per entity:

```json
{entities_aspects}
```

Please perform Aspect Based Sentiment Analysis on the following text:

```text
{text}
```

Only the JSON output is expected without any "```json" text surrounding it. Do not answer with anything except JSON. Only respond with entities and aspects present in the text.

Output example:

```json
{output_example}
```
"""

In [9]:
# create the prompt template
prompt_template = ChatPromptTemplate.from_template(template_string)
prompt_template

ChatPromptTemplate(input_variables=['output_example', 'entities_aspects', 'text'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['entities_aspects', 'output_example', 'text'], output_parser=None, partial_variables={}, template='Given the following entities and aspects per entity:\n\n```json\n{entities_aspects}\n```\n\nPlease perform Aspect Based Sentiment Analysis on the following text:\n\n```text\n{text}\n```\n\nOnly the JSON output is expected without any "```json" text surrounding it. Do not answer with anything except JSON. Only respond with entities and aspects present in the text.\n\nOutput example:\n\n```json\n{output_example}\n```\n', template_format='f-string', validate_template=True), additional_kwargs={})])

### Test the template

In [10]:
# get the complaint text
review_title = df_source.loc[1, 'review_title']
review_content = df_source.loc[1, 'review_content']
complaint_text = f'# {review_title}\n\n{review_content}'

pprint(complaint_text)

('# Airtime charged, but not credited to my phone\n'
 '\n'
 'Bought Airtime online through the Vodacom App and on the last step there was '
 'an error. The money has come off my credit card, but the airtime has not '
 "been credited to my phone. Tried the online chat and was thrown out ('your "
 "chat has been ended'), then tried calling the helpline and again did not get "
 'anywhere - told there were high caller volumes and I should chat tot TOBI '
 'the chatbot, or call back later and then was thrown out of the call. No '
 'option to just stay in a waiting line.... \n'
 'Very bad service Vodacom! ')


In [11]:
# create the prompt
prompt = prompt_template.format_messages(
    entities_aspects=entities_aspects_str,
    text=complaint_text,
    output_example=output_example_str
)

#print(prompt[0].content)

## Functions

In [12]:
def classify_aspects(text:str,
                     entities_aspects:str,
                     output_example:str,
                     prompt_template:ChatPromptTemplate,
                     retry_count:int=5) -> Dict:
    """Classify the aspects of a given text and return an output dictionary."""
    chat = ChatOpenAI(
        temperature=0.0,
        max_tokens=512,
        model='gpt-4')

    # create the prompt
    prompt = prompt_template.format_messages(
        entities_aspects=entities_aspects,
        text=text,
        output_example=output_example
    )

    # get the llm response
    while True:
        try:    
            response = chat(prompt)
            break
        except Exception as e:
            print('.', end='')
            retries += 1
            sleep(retries * 2)
            if retries > retry_count:
                print(f'Failed to classify complaint {meta_data["id"]}')
                print(e)
                return None


    # return the result
    return json.loads(response.content)

# test the function
aspects = classify_aspects(
    text=complaint_text,
    entities_aspects=entities_aspects_str,
    output_example=output_example_str,
    prompt_template=prompt_template)
pprint(aspects)  

{'Account & Plans': {'Account management (online portal/apps)': 'Negative'},
 'Billing & Payments': {'Refunds/credits': 'Negative'},
 'Chatbots': {'Ability to understand query': 'Negative',
              'User-friendliness': 'Negative'},
 'Customer Service': {'Availability (e.g., 24/7 support)': 'Negative',
                      'Responsiveness': 'Negative'}}


In [13]:
def flatten_aspects(id:str, classification:Dict) -> Dict:
    """Flatten the aspects to be compatible with a dataframe and add the id as a column"""
    flat_aspects = []

    for entity, aspects in classification.items():
        for aspect, polarity in aspects.items():
            flat_aspects.append({
                'id': id,
                'entity': entity,
                'aspect': aspect,
                'polarity': polarity,
            })

    return flat_aspects

# test the function
flatten = flatten_aspects("12", aspects)
display(pd.DataFrame(flatten))
#pprint(flatten)

Unnamed: 0,id,entity,aspect,polarity
0,12,Billing & Payments,Refunds/credits,Negative
1,12,Customer Service,Responsiveness,Negative
2,12,Customer Service,"Availability (e.g., 24/7 support)",Negative
3,12,Chatbots,User-friendliness,Negative
4,12,Chatbots,Ability to understand query,Negative
5,12,Account & Plans,Account management (online portal/apps),Negative


In [14]:
def classify_dataset(
        data:pd.DataFrame,
        entities_aspects:str,
        output_example:str,
        prompt_template:ChatPromptTemplate,
        id_column:str='id',
        text_column:str='text') -> pd.DataFrame:
    """Classify the entire dataset using the given entities and aspects."""
    result = []

    for index, row in tqdm(data.iterrows(), total=data.shape[0]):
        # get the row data
        id = row[id_column]
        text = row[text_column]

        # classify the text
        classification = classify_aspects(
            text=text,
            entities_aspects=entities_aspects,
            output_example=output_example,
            prompt_template=prompt_template)
        
        result.extend(
            flatten_aspects(id=id, classification=classification)
        )

    return pd.DataFrame(result)

# test the function
df_test = df_source.head(2)
df_test['text'] = '# ' + df_source.review_title + '\n\n' + df_source.review_content

df_result = classify_dataset(
    data=df_test,
    entities_aspects=entities_aspects_str,
    output_example=output_example_str,
    prompt_template=prompt_template,
    id_column='id',
    text_column='text')

display(df_result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['text'] = '# ' + df_source.review_title + '\n\n' + df_source.review_content


  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0,id,entity,aspect,polarity
0,3344640,Customer Service,"Availability (e.g., 24/7 support)",Negative
1,3344640,Chatbots,User-friendliness,Negative
2,3344640,Chatbots,Response accuracy,Negative
3,3344640,Chatbots,Ability to understand query,Negative
4,3347241,Billing & Payments,Billing accuracy,Negative
5,3347241,Billing & Payments,Refunds/credits,Negative
6,3347241,Customer Service,Responsiveness,Negative
7,3347241,Customer Service,"Availability (e.g., 24/7 support)",Negative
8,3347241,Chatbots,User-friendliness,Negative
9,3347241,Chatbots,Ability to understand query,Negative


## Classification

In [15]:
# add the text column
df_source['text'] = '# ' + df_source.review_title + '\n\n' + df_source.review_content

# perform the classification
df_result = classify_dataset(
    data=df_source,
    entities_aspects=entities_aspects_str,
    output_example=output_example_str,
    prompt_template=prompt_template,
    id_column='id',
    text_column='text')

# save the results
df_result.to_parquet('../../data/interim/01-09_absa.parquet')

  0%|          | 0/235 [00:00<?, ?it/s]