# Comparative Chatbot Analysis

* Import all necessary libraries.

In [1]:
import pandas as pd

from scipy.stats import mannwhitneyu, shapiro
from statsmodels.multivariate.manova import MANOVA

* Get the data.

In [2]:
neg_data = pd.read_csv('neg_ch.csv')
pos_data = pd.read_csv('pos_ch.csv')

## Code the data

* Discard the first column, which has the questionnaire answer timestamp.

In [3]:
neg_data = neg_data.iloc[:, 1:]
pos_data = pos_data.iloc[:, 1:]

* Rename all the columns.

In [4]:
new_column_names = [
    "Gender", "Age", "Education", "Frequency",
    "CS1", "CS2", "CS3", "CS4", 
    "T1", "T2", "T3",
    "PE1", "PE2", "PE3", "PE4", "PE5",
    "PWOM1", "PWOM2", "PWOM3", "PWOM4", "PWOM5",
    "CI1", "CI2",
    "TBF1", "TBF2", "TBF3",
    "TBR1", "TBR2", "TBR3",
    "TBI1", "TBI2",
    "TBC1", "TBC2", "TBC3",
    "TBB1", "TBB2", "TBB3"
]

neg_data.columns = new_column_names
pos_data.columns = new_column_names

* Code the categorical columns into numbers.

In [5]:
neg_data['Gender Coded'] = neg_data['Gender'].map({'Male': 0, 'Female': 1, 'Other': 2})
neg_data['Age Coded'] = neg_data['Age'].map({'18-24': 0, '25-34': 1, '35-49': 2, '50+': 3})
neg_data['Education Coded'] = neg_data['Education'].map({'Middle School': 0, 'High School': 1, 'University': 2, 'Post-graduate Studies': 3})
neg_data['Frequency Coded'] = neg_data['Frequency'].map({'Several times a day': 0, 'Nearly everyday': 1, 'At least once a week': 2,
                                                 'Less than once a month': 3, 'Never': 4})

pos_data['Gender Coded'] = pos_data['Gender'].map({'Male': 0, 'Female': 1, 'Other': 2})
pos_data['Age Coded'] = pos_data['Age'].map({'18-24': 0, '25-34': 1, '35-49': 2, '50+': 3})
pos_data['Education Coded'] = pos_data['Education'].map({'Middle School': 0, 'High School': 1, 'University': 2, 'Post-graduate Studies': 3})
pos_data['Frequency Coded'] = pos_data['Frequency'].map({'Several times a day': 0, 'Nearly everyday': 1, 'At least once a week': 2,
                                                 'Less than once a month': 3, 'Never': 4})

* Create dummy variables for each categorical column.

In [6]:
neg_data = pd.get_dummies(neg_data, columns=['Gender Coded', 'Age Coded', 'Education Coded', 'Frequency Coded'], drop_first=False)
pos_data = pd.get_dummies(pos_data, columns=['Gender Coded', 'Age Coded', 'Education Coded', 'Frequency Coded'], drop_first=False)

* Set construct names and codings.

In [7]:
construct = {
    "Customer Satisfaction": ["CS1", "CS2", "CS3", "CS4"],
    "Trust": ["T1", "T2", "T3"],
    "Positive Emotions": ["PE1", "PE2", "PE3", "PE4", "PE5"],
    "Positive Word of Mouth": ["PWOM1", "PWOM2", "PWOM3", "PWOM4", "PWOM5"],
    "Continuance Intention": ["CI1", "CI2"],
    "TTB - Functionality": ["TBF1", "TBF2", "TBF3"],
    "TTB - Reliability": ["TBR1", "TBR2", "TBR3"],
    "TTB - Integrity": ["TBI1", "TBI2"],
    "TTB - Competence": ["TBC1", "TBC2", "TBC3"],
    "TTB - Benevolence": ["TBB1", "TBB2", "TBB3"]
}

## Mann-Whitney U Tests

* Use Mann-Whitney U Test to compare each chatbot and positive emotions. (H1)

In [8]:
# Aggregate positive emotion scores
pos_data['PE'] = pos_data[['PE1', 'PE2', 'PE3', 'PE4', 'PE5']].mean(axis=1)
neg_data['PE'] = neg_data[['PE1', 'PE2', 'PE3', 'PE4', 'PE5']].mean(axis=1)

# Check pos_data normality
shapiro_pos = shapiro(pos_data['PE'])
print(f"Shapiro-Wilk test (Positive Group): p = {shapiro_pos.pvalue:.4f}")

# Check neg_data normality
shapiro_neg = shapiro(neg_data['PE'])
print(f"Shapiro-Wilk test (Negative Group): p = {shapiro_neg.pvalue:.4f}")

Shapiro-Wilk test (Positive Group): p = 0.0003
Shapiro-Wilk test (Negative Group): p = 0.2586


In [9]:
# Use Mann-Whitney U Test since normality failed
u_stat, p_val = mannwhitneyu(pos_data['PE'], neg_data['PE'], alternative='two-sided')
print(f"Mann-Whitney U test: U = {u_stat}, p-value = {p_val:.4f}")

Mann-Whitney U test: U = 783.0, p-value = 0.0003


* Use Mann-Whitney U Test to compare each chatbot and trust. (H2)

In [10]:
# Aggregate trust scores
pos_data['T'] = pos_data[['T1', 'T2', 'T3']].mean(axis=1)
neg_data['T'] = neg_data[['T1', 'T2', 'T3']].mean(axis=1)

# Check pos_data normality
shapiro_pos = shapiro(pos_data['T'])
print(f"Shapiro-Wilk test (Positive Group): p = {shapiro_pos.pvalue:.4f}")

# Check neg_data normality
shapiro_neg = shapiro(neg_data['T'])
print(f"Shapiro-Wilk test (Negative Group): p = {shapiro_neg.pvalue:.4f}")

Shapiro-Wilk test (Positive Group): p = 0.0012
Shapiro-Wilk test (Negative Group): p = 0.2426


In [11]:
# Use Mann-Whitney U Test since normality failed
u_stat, p_val = mannwhitneyu(pos_data['T'], neg_data['T'], alternative='two-sided')
print(f"Mann-Whitney U test: U = {u_stat}, p-value = {p_val:.4f}")

Mann-Whitney U test: U = 679.5, p-value = 0.0239


* Use Mann-Whitney U Test to compare each chatbot and customer satisfaction. (H6)

In [12]:
# Aggregate customer satisfaction scores
pos_data['CS'] = pos_data[['CS1', 'CS2', 'CS3', 'CS4']].mean(axis=1)
neg_data['CS'] = neg_data[['CS1', 'CS2', 'CS3', 'CS4']].mean(axis=1)

# Check pos_data normality
shapiro_pos = shapiro(pos_data['CS'])
print(f"Shapiro-Wilk test (Positive Group): p = {shapiro_pos.pvalue:.4f}")

# Check neg_data normality
shapiro_neg = shapiro(neg_data['CS'])
print(f"Shapiro-Wilk test (Negative Group): p = {shapiro_neg.pvalue:.4f}")

Shapiro-Wilk test (Positive Group): p = 0.0019
Shapiro-Wilk test (Negative Group): p = 0.5641


In [13]:
# Use Mann-Whitney U Test since normality failed
u_stat, p_val = mannwhitneyu(pos_data['CS'], neg_data['CS'], alternative='two-sided')
print(f"Mann-Whitney U test: U = {u_stat}, p-value = {p_val:.4f}")

Mann-Whitney U test: U = 788.0, p-value = 0.0002


* Use Mann-Whitney U Test to compare each chatbot and positive word of mouth intention. (H7)

In [14]:
# Aggregate positive positive word of mouth intention scores
pos_data['PWoM'] = pos_data[['PWOM1', 'PWOM2', 'PWOM3', 'PWOM4', 'PWOM5']].mean(axis=1)
neg_data['PWoM'] = neg_data[['PWOM1', 'PWOM2', 'PWOM3', 'PWOM4', 'PWOM5']].mean(axis=1)

# Check pos_data normality
shapiro_pos = shapiro(pos_data['PWoM'])
print(f"Shapiro-Wilk test (Positive Group): p = {shapiro_pos.pvalue:.4f}")

# Check neg_data normality
shapiro_neg = shapiro(neg_data['PWoM'])
print(f"Shapiro-Wilk test (Negative Group): p = {shapiro_neg.pvalue:.4f}")

Shapiro-Wilk test (Positive Group): p = 0.0025
Shapiro-Wilk test (Negative Group): p = 0.2603


In [15]:
# Use Mann-Whitney U Test since normality failed
u_stat, p_val = mannwhitneyu(pos_data['PWoM'], neg_data['PWoM'], alternative='two-sided')
print(f"Mann-Whitney U test: U = {u_stat}, p-value = {p_val:.4f}")

Mann-Whitney U test: U = 735.5, p-value = 0.0027


## MANOVA (Multivariate Analysis of Variance)

* Use MANOVA to compare each chatbot and technology trusting beliefs. (H3)

In [16]:
# Calculate TTB - Functionality mean
pos_data['Functionality'] = pos_data[['TBF1', 'TBF2', 'TBF3']].mean(axis=1)
neg_data['Functionality'] = neg_data[['TBF1', 'TBF2', 'TBF3']].mean(axis=1)

# Calculate TTB - Reliability mean
pos_data['Reliability'] = pos_data[['TBR1', 'TBR2', 'TBR3']].mean(axis=1)
neg_data['Reliability'] = neg_data[['TBR1', 'TBR2', 'TBR3']].mean(axis=1)

# Calculate TTB - Integrity mean
pos_data['Integrity'] = pos_data[['TBI1', 'TBI2']].mean(axis=1)
neg_data['Integrity'] = neg_data[['TBI1', 'TBI2']].mean(axis=1)

# Calculate TTB - Competence mean
pos_data['Competence'] = pos_data[['TBC1', 'TBC2', 'TBC3']].mean(axis=1)
neg_data['Competence'] = neg_data[['TBC1', 'TBC2', 'TBC3']].mean(axis=1)

# Calculate TTB - Benevolence mean
pos_data['Benevolence'] = pos_data[['TBB1', 'TBB2', 'TBB3']].mean(axis=1)
neg_data['Benevolence'] = neg_data[['TBB1', 'TBB2', 'TBB3']].mean(axis=1)

# Add sentiment labels
pos_data['Sentiment'] = 'positive'
neg_data['Sentiment'] = 'negative'

# Combine the two
combined_data = pd.concat([pos_data, neg_data], ignore_index=True)

# MANOVA for TTB sub-constructs
maov = MANOVA.from_formula(
    'Functionality + Reliability + Integrity + Competence + Benevolence ~ Sentiment',
    data=combined_data
)
print(maov.mv_test())

                  Multivariate linear model
                                                              
--------------------------------------------------------------
       Intercept         Value  Num DF  Den DF F Value  Pr > F
--------------------------------------------------------------
          Wilks' lambda  0.0787 5.0000 58.0000 135.8301 0.0000
         Pillai's trace  0.9213 5.0000 58.0000 135.8301 0.0000
 Hotelling-Lawley trace 11.7095 5.0000 58.0000 135.8301 0.0000
    Roy's greatest root 11.7095 5.0000 58.0000 135.8301 0.0000
--------------------------------------------------------------
                                                              
--------------------------------------------------------------
         Sentiment        Value  Num DF  Den DF F Value Pr > F
--------------------------------------------------------------
            Wilks' lambda 0.7897 5.0000 58.0000  3.0896 0.0154
           Pillai's trace 0.2103 5.0000 58.0000  3.0896 0.0154
   Hotellin