In [3]:
from util import load_user_data
import pandas as pd
import numpy as np

from pingouin import ancova, anova
from statsmodels.stats.multicomp import pairwise_tukeyhsd


valid_users, tp_data = load_user_data(folder_name="../anonymized_data", reserved_users=None)
user2condition = tp_data['user2condition']
condition_count = {}
for user in valid_users:
    tp_condition = user2condition[user]
    if tp_condition not in condition_count:
        condition_count[tp_condition] = 0
    condition_count[tp_condition] += 1
print(condition_count)

248 complete the NASA-TLX
248 complete the whole study
{'AP-AE': 63, 'UP-AE': 61, 'AP-UE': 64, 'UP-UE': 60}


In [4]:
variable_dict = {}
trust_dimensions = ["Reliability/Competence", "Understanding/Predictability", 
                    "Intention of Developers", "Trust in Automation"]
covariates_1 = ["Propensity to Trust", "Familiarity"]
covariates = ["llm_expertise", "assistant_expertise"]
all_conditions = ["AP-AE", "AP-UE", "UP-AE", "UP-UE"]
condition_dict = {}
for condition in all_conditions:
    condition_dict[condition] = {}
    for dimension in trust_dimensions:
        condition_dict[condition][dimension] = []

for dimension in trust_dimensions:
    variable_dict[dimension] = []
for dimension in covariates_1:
    variable_dict[dimension] = []
for dimension in covariates:
    variable_dict[dimension] = []

user_expertise = tp_data["user_expertise"]
trust = tp_data["trust_in_automation"]
variable_dict["condition"] = []
variable_dict["planning"] = []
variable_dict["execution"] = []
for user in valid_users:
    tp_condition = user2condition[user]
    if tp_condition not in all_conditions:
        # ignore pilot study
        continue
    variable_dict["condition"].append(tp_condition)
    if tp_condition.startswith("AP"):
        variable_dict["planning"].append("automatic")
    else:
        variable_dict["planning"].append("user-involved")
    if tp_condition.endswith("AE"):
        variable_dict["execution"].append("automatic")
    else:
        variable_dict["execution"].append("user-involved")
    for dimension in trust_dimensions:
        variable_dict[dimension].append(trust[user][dimension])
        condition_dict[tp_condition][dimension].append(trust[user][dimension])
    for dimension in ["Propensity to Trust", "Familiarity"]:
        variable_dict[dimension].append(trust[user][dimension])
    for dimension in covariates:
        variable_dict[dimension].append(user_expertise[user][dimension])

In [5]:
for variable in variable_dict:
    print(variable, len(variable_dict[variable]))

Reliability/Competence 248
Understanding/Predictability 248
Intention of Developers 248
Trust in Automation 248
Propensity to Trust 248
Familiarity 248
llm_expertise 248
assistant_expertise 248
condition 248
planning 248
execution 248


In [6]:
df = pd.DataFrame(variable_dict)
print("For all participants, compare with experimental conditions")
for dimension in trust_dimensions:
    print(dimension)
    aov = anova(dv=dimension, between=['planning', 'execution'], data=df, effsize='n2')
    # aov = ancova(dv=dimension, covar=["Propensity to Trust", "Familiarity", "llm_expertise", "assistant_expertise"], between=['planning', 'execution'], data=df, effsize='n2')
    print(aov.round(3))
    if aov.to_dict()['p-unc'][0] <= 0.05 / 4:
        tukey = pairwise_tukeyhsd(endog=df[dimension], groups=df['condition'], alpha=0.0125)
        print(tukey)
    for condition in all_conditions:
        print(condition, np.mean(condition_dict[condition][dimension]))
    print("-" * 17)

For all participants, compare with experimental conditions
Reliability/Competence
                 Source       SS     DF     MS      F  p-unc     n2
0              planning    0.027    1.0  0.027  0.046  0.831  0.000
1             execution    1.896    1.0  1.896  3.197  0.075  0.013
2  planning * execution    0.002    1.0  0.002  0.004  0.949  0.000
3              Residual  144.755  244.0  0.593    NaN    NaN    NaN
AP-AE 3.587301587301588
AP-UE 3.40625
UP-AE 3.5601092896174866
UP-UE 3.3916666666666666
-----------------
Understanding/Predictability
                 Source      SS     DF     MS      F  p-unc     n2
0              planning   0.690    1.0  0.690  2.181  0.141  0.009
1             execution   0.890    1.0  0.890  2.814  0.095  0.011
2  planning * execution   0.363    1.0  0.363  1.148  0.285  0.005
3              Residual  77.152  244.0  0.316    NaN    NaN    NaN
AP-AE 3.4484126984126986
AP-UE 3.25390625
UP-AE 3.2663934426229506
UP-UE 3.225
-----------------
Intention o