# 01-04 : Classify Chatbot Complaints

Use a LLM to classify the chatbot complaints found in `01-03`.

In [1]:
import os
import pandas as pd
import json
from time import sleep
from typing import List, Dict, Tuple
from pprint import pprint
from tqdm.notebook import tqdm

from dotenv import load_dotenv, find_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [2]:
# read local .env file
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

## Data Load

In [3]:
tqdm.pandas()

In [4]:
df_reviews = pd.read_parquet('../../data/interim/01-03_chatbot_reviews.parquet.gz')

print(df_reviews.shape)
display(df_reviews.head())

(564, 33)


Unnamed: 0,id,user_id,created_at,authorDisplayName,author,authorAvatar,author_id,review_title,review_rating,review_content,...,business_reporting,author_created_date,author_total_reviews_count,attachments,business,kw_chatbot_review_title,kw_chatbot_review_content,kw_tobi_review_title,kw_tobi_review_content,chatbot_review
29,4522688,4f7c71a0-26ff-11ee-a3bf-63eff257072a,2023-07-20 15:30:46,Dedre S,Dedre S,,4f7c71a0-26ff-11ee-a3bf-63eff257072a,Get the job done!,1,If I could it would be a negative 2 star ratin...,...,,2023-07-20,1,[],cell-c,False,True,False,False,True
69,4522620,0587c0b8-31fa-11e8-83f4-f23c91bb6188,2023-07-20 15:05:44,Nelly N,Nelly N,,0587c0b8-31fa-11e8-83f4-f23c91bb6188,"Tired of speaking to TOBI, we need humans",3,I bought WhatsApp bundles for R35 on the 13th ...,...,,2011-08-01,11,[],vodacom,False,False,True,True,True
122,4519459,0883e389-31fa-11e8-83f4-f23c91bb6188,2023-07-19 08:06:30,Nelia,Nelia,,0883e389-31fa-11e8-83f4-f23c91bb6188,"USELESS options and even more useless ""BOT"" to...",1,Their webmail7 Suddenly doesn't work anymore. ...,...,,2012-01-24,46,[],vodacom,False,True,False,False,True
125,4519298,0978c1b7-31fa-11e8-83f4-f23c91bb6188,2023-07-18 21:38:08,Diane,Diane,,0978c1b7-31fa-11e8-83f4-f23c91bb6188,Is it Vodacom? or should it be Vodapong or Vod...,1,I recently moved into an apartment/townhouse i...,...,,2012-03-12,5,[],vodacom,False,True,False,False,True
153,4517229,2ddfd4c6-31fa-11e8-83f4-f23c91bb6188,2023-07-18 06:44:51,Ishmael N,Ishmael N,,2ddfd4c6-31fa-11e8-83f4-f23c91bb6188,Vodacom eSim,2,I got myself an iPhone and someone told me abo...,...,,2016-09-14,44,[],vodacom,False,True,False,False,True


## Classification

### Configure the Prompt Template

In [5]:
# To control the randomness and creativity of the generated
# text by an LLM, use temperature = 0.0
chat = ChatOpenAI(temperature=0.0, max_tokens=512)

In [6]:
# define the template string
template_string = """\
For the customer complaint only the JSON output is expected. Do not answer with anything except JSON.

```json
{{  
    chatbot : {{
        related: bool,
        evidence: string,
        classification: string,
        description: string,
        suggestion: string
    }}

    complaint : {{
        classification: string,
        service: string,
        description: string,
        suggestion: string
    }}
}}
```

Use the following guidelines:

- chatbot
    - related: 1 if the complaint relates to a chatbot, 0 otherwise
    - evidence: a string providing the reasoning why a chatbot is related to the complaint
    - classification: a string indicating the classification of the complaint as it relates to the chatbot
    - description: a string providing a description of the complaint as it relates to the chatbot
    - suggestion: Describe what can be done to improve the chatbot
- complaint
    - classification: a string indicating the classification of the complaint
    - service: Describe the service the complaint is about
    - description: Describe why is the customer complaining
    - suggestion: Describe what can be done to resolve the complaint

```complaint
{complaint}
```
"""

In [7]:
# create the prompt template
prompt_template = ChatPromptTemplate.from_template(template_string)
prompt_template

ChatPromptTemplate(input_variables=['complaint'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['complaint'], output_parser=None, partial_variables={}, template='For the customer complaint only the JSON output is expected. Do not answer with anything except JSON.\n\n```json\n{{  \n    chatbot : {{\n        related: bool,\n        evidence: string,\n        classification: string,\n        description: string,\n        suggestion: string\n    }}\n\n    complaint : {{\n        classification: string,\n        service: string,\n        description: string,\n        suggestion: string\n    }}\n}}\n```\n\nUse the following guidelines:\n\n- chatbot\n    - related: 1 if the complaint relates to a chatbot, 0 otherwise\n    - evidence: a string providing the reasoning why a chatbot is related to the complaint\n    - classification: a string indicating the classification of the complaint as it relates to the chatbot\n    - d

#### Test the Prompt Template

In [8]:
# set the customer complaint
customer_complaint = """\
Tired of speaking to TOBI, we need humans

I bought WhatsApp bundles for R35 on the 13th July 2023, which have not been allocated till today the 20th, but i was rewarded with 3.5 Vodabucks. On the 14th I called customer service but was referred to TOBI the BOT, of which it was hard for TOBI to understand because he is an Automated Machine, I then asked to speak to a human, with no success. Sent an email to customer care, got a reference but no call back. I went to Sandton branch, but they could not help me because I bought the data online via capitec app. Can we please speak to humans, its really tiring not being able to express yourself. Kindly serve your customers well, this is really frustrating.
"""

# create the customer message
customer_messages = prompt_template.format_messages(
    complaint=customer_complaint)

# show the formatted customer message
print(customer_messages[0])

content='For the customer complaint only the JSON output is expected. Do not answer with anything except JSON.\n\n```json\n{  \n    chatbot : {\n        related: bool,\n        evidence: string,\n        classification: string,\n        description: string,\n        suggestion: string\n    }\n\n    complaint : {\n        classification: string,\n        service: string,\n        description: string,\n        suggestion: string\n    }\n}\n```\n\nUse the following guidelines:\n\n- chatbot\n    - related: 1 if the complaint relates to a chatbot, 0 otherwise\n    - evidence: a string providing the reasoning why a chatbot is related to the complaint\n    - classification: a string indicating the classification of the complaint as it relates to the chatbot\n    - description: a string providing a description of the complaint as it relates to the chatbot\n    - suggestion: Describe what can be done to improve the chatbot\n- complaint\n    - classification: a string indicating the classificati

In [9]:
# create the chatbot response
customer_response = chat(customer_messages)

print(customer_response.content)

{
    "chatbot": {
        "related": 1,
        "evidence": "The complaint mentions TOBI the BOT, an automated machine",
        "classification": "Customer dissatisfaction with chatbot",
        "description": "The customer is frustrated with the chatbot and wants to speak to a human",
        "suggestion": "Improve the chatbot's understanding and provide an option to speak to a human"
    },
    "complaint": {
        "classification": "Customer service issue",
        "service": "WhatsApp bundles",
        "description": "The customer purchased WhatsApp bundles but they were not allocated, and they were unable to get assistance from customer service",
        "suggestion": "Allocate the purchased WhatsApp bundles and improve customer service response"
    }
}


In [10]:
def classify_complaint(complaint):
    """Classify the complaint using the chatbot."""
    # create the customer message
    customer_messages = prompt_template.format_messages(
        complaint=complaint)

    # create the chatbot response
    customer_response = chat(customer_messages)

    # return the chatbot response
    return json.loads(customer_response.content)

# test the function
pprint(
    classify_complaint("""\
Bad sevice

Worst customer service ever, a customer for over 20 years, fall behind on one payment and they cut your line, pay the outstanding ballance and you wait 5 hours instead of 2 as indicated to re connect line, bit they are easy to take your money. Being a red vip member means nothing, after 20 plus years i will cancell my contrack. 20 years vodacom and thjs is hkw you yteay customers, no wonder you are loosing yout market share to mtn, you are more costly than mtn but stiil o stayed a loyal customer, bot no mlre, inwork on a customer servive for a long time upmarket mercedes customers and will rever all customer to buy mtn and not vodacom.

PATHETIC customer service""")
)

{'chatbot': {'classification': '',
             'description': '',
             'evidence': '',
             'related': 0,
             'suggestion': ''},
 'complaint': {'classification': 'Bad service',
               'description': 'Worst customer service ever, a customer for '
                              'over 20 years, fall behind on one payment and '
                              'they cut your line, pay the outstanding balance '
                              'and you wait 5 hours instead of 2 as indicated '
                              'to reconnect line, but they are easy to take '
                              'your money. Being a red VIP member means '
                              'nothing, after 20 plus years I will cancel my '
                              'contract. 20 years Vodacom and this is how you '
                              'treat customers, no wonder you are losing your '
                              'market share to MTN, you are more costly than '
          

In [11]:
pprint(
    classify_complaint("""\
***** This Tobias 😒

Is there a way to speak with an actual customer care agent because this Tobias thing is an idiotic problem that has wasted more than enough of my time.

I don't know if my number has been disconnected or if I'm experiencing a network issue but today I needed access to my number but no I was stuck with Tobias who either ends my calls or asks me the same questions over again.

Like WTH???""")
)

{'chatbot': {'classification': 'Customer dissatisfaction with chatbot',
             'description': 'The customer is frustrated with the chatbot '
                            'Tobias and wants to speak with a human customer '
                            'care agent.',
             'evidence': 'The complaint mentions Tobias, which is the name of '
                         'the chatbot.',
             'related': 1,
             'suggestion': "Improve the chatbot's ability to understand and "
                           'address customer concerns, and provide an option '
                           'for customers to speak with a human agent if '
                           'needed.'},
 'complaint': {'classification': 'Technical issue',
               'description': 'The customer is unable to access their phone '
                              'number and suspects a disconnection or network '
                              'issue.',
               'service': 'Phone number access',
             

### Classify the complaints

In [12]:
def classify_complaints(data:pd.DataFrame,
                        output_path:str,
                        retry_count:int=5) -> List[Dict]:
    """Classify the complaints using the chatbot."""
    classifications = []

    for _, row in tqdm(data.iterrows(), total=data.shape[0]):
        # get the complaint metadata
        meta_data = {
            'id': row['id'],
            'created_at': row['created_at'].strftime('%Y-%m-%d %H:%M:%S'),
            'review_rating': row['review_rating'],
            'review_title': row['review_title'],
            'review_content': row['review_content'],
            'business_slug': row['business_slug'],
        }

        # classify the complaint
        retries = 0
        while True:
            try:
                classification = classify_complaint(
                    f'{meta_data["review_title"]}\n\n{meta_data["review_content"]}')
                
                # add the metadata to the classification
                classification['meta'] = meta_data

                # save the classification to disk
                output_file = f'{output_path}/{meta_data["id"]}.json'
                with open(output_file, 'w') as f:
                    json.dump(classification, f)
                    f.write('\n')

                # add the classification to the list
                classifications.append(classification)
                break
            except Exception as e:
                print('.', end='')
                retries += 1
                sleep(retries * 2)
                if retries > retry_count:
                    print(f'Failed to classify complaint {meta_data["id"]}')
                    print(e)
                    break

    return classifications

# test the function
classifications = classify_complaints(
    data=df_reviews.sample(10, random_state=42),
    output_path='../../data/interim/01-04_chatbot_classifications')
pprint(classifications[:2], indent=2)

  0%|          | 0/10 [00:00<?, ?it/s]

[ { 'chatbot': { 'classification': 'Customer Service',
                 'description': 'The customer complains about the chatbot '
                                'being unhelpful and not providing assistance',
                 'evidence': 'The customer mentions trying to contact the '
                             'chatbot',
                 'related': 1,
                 'suggestion': "Improve the chatbot's intelligence and ability "
                               'to assist customers'},
    'complaint': { 'classification': 'Customer Service',
                   'description': 'The customer complains about the lack of '
                                  'human support and difficulty in resolving a '
                                  'fraud issue',
                   'service': 'Vodacom',
                   'suggestion': 'Provide better options for customers to '
                                 'speak to a human representative and improve '
                                 'email supp

In [13]:
# classify all the complaints
classifications = classify_complaints(
    data=df_reviews,
    output_path='../../data/interim/01-04_chatbot_classifications')

  0%|          | 0/564 [00:00<?, ?it/s]

......Failed to classify complaint 4508220
Unterminated string starting at: line 12 column 24 (char 263)
......Failed to classify complaint 4459705
Unterminated string starting at: line 12 column 24 (char 286)
......Failed to classify complaint 3806240
Unterminated string starting at: line 12 column 24 (char 276)
......Failed to classify complaint 3580113
Unterminated string starting at: line 12 column 24 (char 266)
......Failed to classify complaint 3565722
Unterminated string starting at: line 12 column 24 (char 267)
.............Failed to classify complaint 3399497
Unterminated string starting at: line 12 column 24 (char 263)
......Failed to classify complaint 3386063
Unterminated string starting at: line 12 column 24 (char 267)
......Failed to classify complaint 3354780
Unterminated string starting at: line 12 column 24 (char 273)
......Failed to classify complaint 3353960
Unterminated string starting at: line 12 column 24 (char 264)
