# 03-05 : Time Series Analysis

referece: [Lecture 4: Pre-processing and advanced indexing](https://learn.london.ac.uk/mod/page/view.php?id=111306&forceview=1)

In [1]:
import pandas as pd

## Data Collection

### Functions

In [2]:
def set_category(dataframe:pd.DataFrame, column_name:str) -> None:
    """Set the specified column to be a category type."""
    dataframe[column_name] = dataframe[column_name].astype('category')

### Hellopter

In [3]:
# load the classified hellopeter data
df_hellopeter = pd.read_parquet('../../data/interim/01-06_human_classified.parquet')
df_hellopeter.set_index('id', inplace=True)

# show the data loaded
print(df_hellopeter.shape)
display(df_hellopeter.head(3))

(235, 16)


Unnamed: 0_level_0,created_at,review_rating,review_title,review_content,business_slug,chatbot_related,chatbot_evidence,chatbot_classification,chatbot_description,chatbot_suggestion,complaint_classification,complaint_service,complaint_description,complaint_suggestion,human_chatbot_classification,human_complaint_classification
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3344640,2021-01-07 13:22:34,1,No option to speak to the agent on the custome...,Am not able to call vodacom to block my number...,vodacom,1,The customer mentioned that the Tobi bot canno...,Customer care assistance,The customer is complaining about not being ab...,Improve the chatbot's availability and provide...,Customer care,Vodacom,The customer is unable to call Vodacom to bloc...,Provide an option for customers to speak to an...,"limited functionality, unable to contact human...",blacklist
3347241,2021-01-10 11:32:59,1,"Airtime charged, but not credited to my phone",Bought Airtime online through the Vodacom App ...,vodacom,1,The customer mentioned trying to chat with TOB...,Customer service,The customer complained about being thrown out...,Improve the stability of the chatbot to preven...,Billing,Airtime,The customer bought airtime online but it was ...,Investigate the issue and credit the airtime t...,"technical error, unable to contact human agent",missing airtime
3353838,2021-01-15 11:32:11,1,Chatbot Tobi/ Voice Bundle,I am disappointed at how your service has beco...,vodacom,1,The complaint mentions the introduction of a c...,Limited functionality,The chatbot has made it impossible for custome...,Improve the chatbot's capabilities to handle a...,Service issue,Voice Bundle,Failed to load voice bundle but debited the cu...,Load the customer's voice bundle or reimburse ...,"limited functionality, unable to contact human...",voice bundle


In [4]:
df_hellopeter.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, 3344640 to 4522620
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   created_at                      235 non-null    datetime64[ns]
 1   review_rating                   235 non-null    int64         
 2   review_title                    235 non-null    object        
 3   review_content                  235 non-null    object        
 4   business_slug                   235 non-null    object        
 5   chatbot_related                 235 non-null    int64         
 6   chatbot_evidence                235 non-null    object        
 7   chatbot_classification          235 non-null    object        
 8   chatbot_description             235 non-null    object        
 9   chatbot_suggestion              235 non-null    object        
 10  complaint_classification        235 non-null    object        
 11  c

In [5]:
def set_hellopeter_categorical(dataframe: pd.DataFrame) -> pd.DataFrame:
    """Set the datatype for all categorical columns"""
    df_result = dataframe.copy()

    set_category(df_result, 'business_slug')
    set_category(df_result, 'chatbot_classification')
    set_category(df_result, 'complaint_classification')
    set_category(df_result, 'complaint_service')
    set_category(df_result, 'human_chatbot_classification')
    set_category(df_result, 'human_complaint_classification')

    return df_result

# set the categorical columns
df_hellopeter = set_hellopeter_categorical(df_hellopeter)
df_hellopeter.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, 3344640 to 4522620
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   created_at                      235 non-null    datetime64[ns]
 1   review_rating                   235 non-null    int64         
 2   review_title                    235 non-null    object        
 3   review_content                  235 non-null    object        
 4   business_slug                   235 non-null    category      
 5   chatbot_related                 235 non-null    int64         
 6   chatbot_evidence                235 non-null    object        
 7   chatbot_classification          235 non-null    category      
 8   chatbot_description             235 non-null    object        
 9   chatbot_suggestion              235 non-null    object        
 10  complaint_classification        235 non-null    category      
 11  c

In [6]:
def set_hellopeter_boolean(dataframe: pd.DataFrame) -> pd.DataFrame:
    """Update the datatype of boolean columns"""
    df_result = dataframe.copy()
    df_result['chatbot_related'] = df_result['chatbot_related'].astype('bool')

    return df_result

# set the boolean columns
df_hellopeter = set_hellopeter_boolean(df_hellopeter)
df_hellopeter.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, 3344640 to 4522620
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   created_at                      235 non-null    datetime64[ns]
 1   review_rating                   235 non-null    int64         
 2   review_title                    235 non-null    object        
 3   review_content                  235 non-null    object        
 4   business_slug                   235 non-null    category      
 5   chatbot_related                 235 non-null    bool          
 6   chatbot_evidence                235 non-null    object        
 7   chatbot_classification          235 non-null    category      
 8   chatbot_description             235 non-null    object        
 9   chatbot_suggestion              235 non-null    object        
 10  complaint_classification        235 non-null    category      
 11  c

### Hellopeter Aspect Based Sentiment Analysis data

In [7]:
# load the hellopeter aspect based sentiment analysis data
df_hellopeter_absa = pd.read_parquet('../../data/interim/01-07_quadruples.parquet')

print(df_hellopeter_absa.shape)
display(df_hellopeter_absa.head())

(283, 5)


Unnamed: 0,aspect,polarity,opinion,category,id
0,customer care line,negative,,SUPPORT#GENERAL,3344640
1,service Vodacom,negative,bad,SUPPORT#GENERAL,3347241
2,service,negative,limited,SERVICE#GENERAL,3353838
3,chatbot Tobi,negative,disappointed,SERVICE#GENERAL,3353838
4,vodacom,negative,daft,SUPPORT#GENERAL,3358601


In [9]:
def set_hellopeter_absa_categorical(dataframe: pd.DataFrame) -> pd.DataFrame:
    """Set the datatype for all categorical columns"""
    df_result = dataframe.copy()

    set_category(df_result, 'aspect')
    set_category(df_result, 'polarity')
    set_category(df_result, 'opinion')
    set_category(df_result, 'category')

    return df_result

# set the categorical columns
df_hellopeter_absa = set_hellopeter_absa_categorical(df_hellopeter_absa)
df_hellopeter_absa.info()

display(df_hellopeter_absa.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 283 entries, 0 to 282
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   aspect    283 non-null    category
 1   polarity  283 non-null    category
 2   opinion   283 non-null    category
 3   category  283 non-null    category
 4   id        283 non-null    int64   
dtypes: category(4), int64(1)
memory usage: 15.0 KB


Unnamed: 0,aspect,polarity,opinion,category,id
0,customer care line,negative,,SUPPORT#GENERAL,3344640
1,service Vodacom,negative,bad,SUPPORT#GENERAL,3347241
2,service,negative,limited,SERVICE#GENERAL,3353838
3,chatbot Tobi,negative,disappointed,SERVICE#GENERAL,3353838
4,vodacom,negative,daft,SUPPORT#GENERAL,3358601
