# 02-05 : Aspect Based Sentiment Analysis (LLM)

In [1]:
import os
import pandas as pd
import json
from time import sleep
from typing import List, Dict, Tuple
from pprint import pprint
from tqdm.notebook import tqdm

from dotenv import load_dotenv, find_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [2]:
# read local .env file
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
tqdm.pandas()

## Data Load

In [4]:
df_source = pd.read_parquet('../../data/interim/02-03_twitter_vodacom.parquet')

# display the first 3 rows
print(df_source.shape)
display(df_source.head(3))

(983, 22)


Unnamed: 0,id,conversationId,date,rawContent,inReplyToTweetId,inReplyToUserId,inReplyToUsername,inReplyToDisplayName,language,likeCount,...,retweetCount,source,url,userCreated,userDisplayName,userFollowersCount,userId,userLocation,userStatusesCount,userUsername
0,1684937340169375744,1684937340169375744,2023-07-28 14:42:21+00:00,"Serious question:\n\nHas anyone, anywhere ever...",,,,,en,2,...,1,Twitter for Android,https://twitter.com/TopEditorInt/status/168493...,2009-03-04 11:44:38+00:00,TopEditor Internatio,2956,22759776,USA & SA & UK,70294,TopEditorInt
1,1684892139375710208,1684890636997029888,2023-07-28 11:42:44+00:00,@Vodacom That’s why I wanted to talk to someon...,1.6848917624965652e+18,14574763.0,Vodacom,Vodacom,en,0,...,0,Twitter for iPhone,https://twitter.com/AkonaMhlana/status/1684892...,2009-05-20 14:38:05+00:00,Ntaba ayilali👏🏽,135,41370409,South Africa,866,AkonaMhlana
2,1684890636997029888,1684890636997029888,2023-07-28 11:36:46+00:00,@Vodacom what number should I dial to talk to...,,14574763.0,Vodacom,Vodacom,en,0,...,0,Twitter for iPhone,https://twitter.com/AkonaMhlana/status/1684890...,2009-05-20 14:38:05+00:00,Ntaba ayilali👏🏽,135,41370409,South Africa,866,AkonaMhlana


## Aspects

In [5]:
entities_aspects = {
    "Billing & Payments": [
        "Billing accuracy",
        "Payment methods",
        "Refunds/credits",
        "Hidden charges",
        "Monthly costs"
    ],
    "Network & Connectivity": [
        "Signal strength",
        "Network coverage",
        "Data speeds (4G, 5G, etc.)",
        "Call quality",
        "Dropped calls",
        "Roaming"
    ],
    "Customer Service": [
        "Responsiveness",
        "Friendliness/professionalism",
        "Knowledge/competence",
        "Resolution time",
        "Availability (e.g., 24/7 support)"
    ],
    "Chatbots": [
        "User-friendliness",
        "Response accuracy",
        "Speed of response",
        "Ability to understand query",
        "Escalation to human agents"
    ],
    "Account & Plans": [
        "Account management (online portal/apps)",
        "Plan flexibility",
        "Plan pricing",
        "Upgrade/downgrade process",
        "Promotions and offers"
    ],
    "Hardware/Devices": [
        "Setup/ease of installation",
        "Device reliability",
        "Device performance/speed",
        "Rental vs. purchase options",
        "Technical issues"
    ],
    "Value-added Services": [
        "Quality of service",
        "Pricing/value for money",
        "Reliability",
        "Content variety (for streaming)",
        "Ease of use"
    ]
}

## Prompt Template

In [6]:
# get the entities and aspects as a string
entities_aspects_str = json.dumps(entities_aspects, indent=4)

In [7]:
# an example of the output the llm should produce
output_example = {
    "Customer Service": {
        "Responsiveness": "Negative",
        "Friendliness/professionalism": "Negative",
        "Knowledge/competence": "Negative",
        "Availability (e.g., 24/7 support)": "Negative"
    },
    "Chatbots": {
        "User-friendliness": "Neutral",
        "Response accuracy": "Negative",
        "Ability to understand query": "Negative"
    }
}

output_example_str = json.dumps(output_example, indent=4)

In [8]:
# set the template string
template_string = """\
TOBi is the name of the Vodacom Chatbot.
Given the following entities and aspects per entity:

```json
{entities_aspects}
```

Please perform Aspect Based Sentiment Analysis on the following text:

```text
{text}
```

Only the JSON output is expected without any "```json" text surrounding it. Do not answer with anything except JSON. Only respond with entities and aspects present in the text.

Output example:

```json
{output_example}
```
"""

In [9]:
# create the prompt template
prompt_template = ChatPromptTemplate.from_template(template_string)
prompt_template

ChatPromptTemplate(input_variables=['output_example', 'text', 'entities_aspects'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['entities_aspects', 'output_example', 'text'], output_parser=None, partial_variables={}, template='TOBi is the name of the Vodacom Chatbot.\nGiven the following entities and aspects per entity:\n\n```json\n{entities_aspects}\n```\n\nPlease perform Aspect Based Sentiment Analysis on the following text:\n\n```text\n{text}\n```\n\nOnly the JSON output is expected without any "```json" text surrounding it. Do not answer with anything except JSON. Only respond with entities and aspects present in the text.\n\nOutput example:\n\n```json\n{output_example}\n```\n', template_format='f-string', validate_template=True), additional_kwargs={})])

## Functions

In [10]:
def classify_aspects(text:str,
                     entities_aspects:str,
                     output_example:str,
                     prompt_template:ChatPromptTemplate,
                     retry_count:int=5) -> Dict:
    """Classify the aspects of a given text and return an output dictionary."""
    chat = ChatOpenAI(
        temperature=0.0,
        max_tokens=512,
        model='gpt-4')

    # create the prompt
    prompt = prompt_template.format_messages(
        entities_aspects=entities_aspects,
        text=text,
        output_example=output_example
    )

    # get the llm response
    while True:
        try:    
            response = chat(prompt)
            break
        except Exception as e:
            print('.', end='')
            retries += 1
            sleep(retries * 3)
            if retries > retry_count:
                print(f'Failed to classify complaint: {text}')
                print(e)
                return None


    # return the result
    try: 
        result = json.loads(response.content)
        return result
    except Exception as e:
        print(e)
        return None
    

In [11]:
def flatten_aspects(id:str, classification:Dict) -> Dict:
    """Flatten the aspects to be compatible with a dataframe and add the id as a column"""
    flat_aspects = []

    for entity, aspects in classification.items():
        for aspect, polarity in aspects.items():
            flat_aspects.append({
                'id': id,
                'entity': entity,
                'aspect': aspect,
                'polarity': polarity,
            })

    return flat_aspects

In [12]:
def classify_dataset(
        data:pd.DataFrame,
        entities_aspects:str,
        output_example:str,
        prompt_template:ChatPromptTemplate,
        id_column:str='id',
        text_column:str='text') -> pd.DataFrame:
    """Classify the entire dataset using the given entities and aspects."""
    result = []

    for index, row in tqdm(data.iterrows(), total=data.shape[0]):
        # get the row data
        id = row[id_column]
        text = row[text_column]

        # classify the text
        classification = classify_aspects(
            text=text,
            entities_aspects=entities_aspects,
            output_example=output_example,
            prompt_template=prompt_template)
        
        if classification is not None:
            result.extend(
                flatten_aspects(id=id, classification=classification)
            )

    return pd.DataFrame(result)

## Classification

In [13]:
# perform the classification
df_result = classify_dataset(
    data=df_source,
    entities_aspects=entities_aspects_str,
    output_example=output_example_str,
    prompt_template=prompt_template,
    id_column='id',
    text_column='rawContent')

# save the results
df_result.to_parquet('../../data/interim/02-05_absa.parquet')

  0%|          | 0/983 [00:00<?, ?it/s]

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIError: HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
).
