# 01-05 : Combine the Chatbot Classifications

In [1]:
import duckdb
import pandas as pd

## DuckDB Configuration

_reference: https://duckdb.org/docs/guides/python/jupyter.html_

In [2]:
# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

In [3]:
%sql duckdb:///:default:

## Data Load

In [4]:
# load the json files into a dataframe
%sql df_source << SELECT * FROM "../../data/interim/01-04_chatbot_classifications/*.json"

# show the data
print(df_source.shape)
display(df_source)

(555, 3)


Unnamed: 0,chatbot,complaint,meta
0,"{'related': 0, 'evidence': '', 'classification...","{'classification': 'Customer Service', 'servic...","{'id': 3339887, 'created_at': 2021-01-04 11:56..."
1,"{'related': 0, 'evidence': '', 'classification...","{'classification': 'billing', 'service': 'Telk...","{'id': 3341993, 'created_at': 2021-01-05 16:33..."
2,"{'related': 0, 'evidence': '', 'classification...","{'classification': 'Customer Service', 'servic...","{'id': 3342153, 'created_at': 2021-01-05 18:30..."
3,"{'related': 1, 'evidence': 'The customer menti...","{'classification': 'Customer care', 'service':...","{'id': 3344640, 'created_at': 2021-01-07 13:22..."
4,"{'related': 0, 'evidence': '', 'classification...","{'classification': 'Billing', 'service': 'Voda...","{'id': 3345422, 'created_at': 2021-01-08 08:26..."
...,...,...,...
550,"{'related': 1, 'evidence': 'The customer menti...","{'classification': 'Technical', 'service': 'Vo...","{'id': 4517229, 'created_at': 2023-07-18 06:44..."
551,"{'related': 0, 'evidence': '', 'classification...","{'classification': 'Poor Service', 'service': ...","{'id': 4519298, 'created_at': 2023-07-18 21:38..."
552,"{'related': 1, 'evidence': 'The complaint ment...","{'classification': 'Technical issue', 'service...","{'id': 4519459, 'created_at': 2023-07-19 08:06..."
553,"{'related': 1, 'evidence': 'The customer menti...","{'classification': 'Service complaint', 'servi...","{'id': 4522620, 'created_at': 2023-07-20 15:05..."


## Data Preparation

### Normalise the JSON data

In [5]:
df_chatbot = pd.json_normalize(df_source['chatbot'])
df_complaint = pd.json_normalize(df_source['complaint'])
df_meta = pd.json_normalize(df_source['meta'])

# display the result
display(df_chatbot.head())
display(df_complaint.head())
display(df_meta.head())

Unnamed: 0,related,evidence,classification,description,suggestion
0,0,,,,
1,0,,,,
2,0,,,,
3,1,The customer mentioned that the Tobi bot canno...,Customer care assistance,The customer is complaining about not being ab...,Improve the chatbot's availability and provide...
4,0,,,,


Unnamed: 0,classification,service,description,suggestion
0,Customer Service,Call Center,The customer missed a debit order and made arr...,Vodacom should improve their customer service ...
1,billing,Telkom,Billed on the cancelled contract,Refund the customer and improve customer service
2,Customer Service,Vodacom,Vodacom - A dishonest company. I have been try...,Vodacom should assign a dedicated agent with t...
3,Customer care,Vodacom,The customer is unable to call Vodacom to bloc...,Provide an option for customers to speak to an...
4,Billing,Vodacom,Customer received a bill for a data contract t...,Investigate the contract cancellation and bill...


Unnamed: 0,id,created_at,review_rating,review_title,review_content,business_slug
0,3339887,2021-01-04 11:56:13,1,The worst customer care and call center,I missed a debit order in December and made ar...,vodacom
1,3341993,2021-01-05 16:33:36,1,billed on the cancelled contract,Good day\nI have been calling Telkom from Octo...,telkom
2,3342153,2021-01-05 18:30:46,1,Vodacom's dishonesty,Vadacom -A dishonest company. I have been tryi...,vodacom
3,3344640,2021-01-07 13:22:34,1,No option to speak to the agent on the custome...,Am not able to call vodacom to block my number...,vodacom
4,3345422,2021-01-08 08:26:08,1,Vodacom Billing For Non Service,After spending my morning being passed from on...,vodacom


### Combine The Dataframes

In [8]:
# get the original complaint data
df_classifications = df_meta.copy()

# combine the chatbot classifications
df_classifications = df_classifications.join(df_chatbot.add_prefix('chatbot_'), how='left')

# combine the complaint classifications
df_classifications = df_classifications.join(df_complaint.add_prefix('complaint_'), how='left')

# display the results
print(df_classifications.shape)
df_classifications.head()

(555, 15)


Unnamed: 0,id,created_at,review_rating,review_title,review_content,business_slug,chatbot_related,chatbot_evidence,chatbot_classification,chatbot_description,chatbot_suggestion,complaint_classification,complaint_service,complaint_description,complaint_suggestion
0,3339887,2021-01-04 11:56:13,1,The worst customer care and call center,I missed a debit order in December and made ar...,vodacom,0,,,,,Customer Service,Call Center,The customer missed a debit order and made arr...,Vodacom should improve their customer service ...
1,3341993,2021-01-05 16:33:36,1,billed on the cancelled contract,Good day\nI have been calling Telkom from Octo...,telkom,0,,,,,billing,Telkom,Billed on the cancelled contract,Refund the customer and improve customer service
2,3342153,2021-01-05 18:30:46,1,Vodacom's dishonesty,Vadacom -A dishonest company. I have been tryi...,vodacom,0,,,,,Customer Service,Vodacom,Vodacom - A dishonest company. I have been try...,Vodacom should assign a dedicated agent with t...
3,3344640,2021-01-07 13:22:34,1,No option to speak to the agent on the custome...,Am not able to call vodacom to block my number...,vodacom,1,The customer mentioned that the Tobi bot canno...,Customer care assistance,The customer is complaining about not being ab...,Improve the chatbot's availability and provide...,Customer care,Vodacom,The customer is unable to call Vodacom to bloc...,Provide an option for customers to speak to an...
4,3345422,2021-01-08 08:26:08,1,Vodacom Billing For Non Service,After spending my morning being passed from on...,vodacom,0,,,,,Billing,Vodacom,Customer received a bill for a data contract t...,Investigate the contract cancellation and bill...


In [10]:
# show the current schema
df_classifications.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555 entries, 0 to 554
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   id                        555 non-null    int64         
 1   created_at                555 non-null    datetime64[ns]
 2   review_rating             555 non-null    int64         
 3   review_title              555 non-null    object        
 4   review_content            555 non-null    object        
 5   business_slug             555 non-null    object        
 6   chatbot_related           555 non-null    int64         
 7   chatbot_evidence          543 non-null    object        
 8   chatbot_classification    543 non-null    object        
 9   chatbot_description       543 non-null    object        
 10  chatbot_suggestion        543 non-null    object        
 11  complaint_classification  555 non-null    object        
 12  complaint_service     

### Filter Classifications

In [12]:
# select only chatbot related reviews
df_chatbot_classifications = df_classifications[df_classifications['chatbot_related'] == 1]

# display the results
print(df_chatbot_classifications.shape)
display(df_chatbot_classifications.head())

(235, 15)


Unnamed: 0,id,created_at,review_rating,review_title,review_content,business_slug,chatbot_related,chatbot_evidence,chatbot_classification,chatbot_description,chatbot_suggestion,complaint_classification,complaint_service,complaint_description,complaint_suggestion
3,3344640,2021-01-07 13:22:34,1,No option to speak to the agent on the custome...,Am not able to call vodacom to block my number...,vodacom,1,The customer mentioned that the Tobi bot canno...,Customer care assistance,The customer is complaining about not being ab...,Improve the chatbot's availability and provide...,Customer care,Vodacom,The customer is unable to call Vodacom to bloc...,Provide an option for customers to speak to an...
5,3347241,2021-01-10 11:32:59,1,"Airtime charged, but not credited to my phone",Bought Airtime online through the Vodacom App ...,vodacom,1,The customer mentioned trying to chat with TOB...,Customer service,The customer complained about being thrown out...,Improve the stability of the chatbot to preven...,Billing,Airtime,The customer bought airtime online but it was ...,Investigate the issue and credit the airtime t...
11,3353838,2021-01-15 11:32:11,1,Chatbot Tobi/ Voice Bundle,I am disappointed at how your service has beco...,vodacom,1,The complaint mentions the introduction of a c...,Limited functionality,The chatbot has made it impossible for custome...,Improve the chatbot's capabilities to handle a...,Service issue,Voice Bundle,Failed to load voice bundle but debited the cu...,Load the customer's voice bundle or reimburse ...
12,3358601,2021-01-20 06:28:13,1,"Vodacom not assisting customers, but extorting...","I have many complaints, but will highlight the...",vodacom,1,The complaint mentions the chatbot as being da...,Ineffective chatbot,The chatbot does not provide helpful assistanc...,Improve the chatbot's ability to provide accur...,Poor customer service,Vodacom,The customer is unable to contact Vodacom due ...,Resolve the issue of phone locking and improve...
15,3367621,2021-01-27 10:41:25,1,Missing airtime/No customer service,My airtime just went missing. Failed to purcha...,vodacom,1,The complaint mentions a bot system which does...,Technical issue,The customer is complaining about the non-func...,Improve the bot system to ensure it is functio...,Service issue,Vodacom,"The customer's airtime went missing, failed to...",Provide better customer service support and fi...


## Exploratory Data Analysis