In [1]:
import os
import numpy as np
import pandas as pd

os.chdir("..")

In [2]:
from model.prompter import Prompter

In [3]:
QUALTRICS_COLUMNS = [
    "User ID",
    "Cognitive Load",
    "Confidence",
    "C1",
    "C1.1",
    "C1.2",
    "C1.3",
    "C2",
    "C2.1",
    "C2.2",
    "C2.3",
    "C3.1",
    "C3.2",
    "C3.3",
    "C3.4",
    "C3.5",
    "Comments",
]

QUALTRICS_ADDITIONAL_COLUMNS = [
    "informative",
    "relevant",
    "criteria",
    "details",
    "comparing",
    "overwhelming",
    "distracting",
    "DR1C1.1",
    "DR1C2.2",
    "DR2C1.2",
    "DR2C2",
    "DR3C3.1",
    "DR3C3.5",
]

METRIC_COLUMNS = [
    "Consistency",
    "Total Time",
    "Overconfidence",
]

LIKERT_COLUMNS = [
    "Cognitive Load",
    "Confidence",
    "C1",
    "C1.1",
    "C1.2",
    "C1.3",
    "C2",
    "C2.1",
    "C2.2",
    "C2.3",
    "C3.1",
    "C3.2",
    "C3.3",
    "C3.4",
    "C3.5",
]

In [4]:
baseline_df = pd.read_csv("data_analysis/questionnaire/baseline_origin.csv")[
    QUALTRICS_COLUMNS
]
baseline_questions = baseline_df.loc[0, :]
baseline_questions.to_json(
    "data_analysis/questionnaire/baseline_questions.json", indent=4
)

baseline_df = baseline_df.drop([0, 1])
baseline_df["User ID"] = baseline_df["User ID"].str.upper()
baseline_df = baseline_df.sort_values(by=["User ID"]).reset_index(drop=True)
baseline_df["Consistency"] = baseline_df["User ID"].apply(
    lambda uid: Prompter(uid).get_consistency()
)
baseline_df["Total Time"] = pd.read_csv("data_analysis/time_analysis/baseline.csv")["Total Time"]
baseline_df["Overconfidence"] = (baseline_df["Confidence"].astype(int) - 1)/6 - baseline_df["Consistency"].astype(float)
baseline_df[LIKERT_COLUMNS] = baseline_df[LIKERT_COLUMNS].astype(int)
baseline_df[METRIC_COLUMNS] = baseline_df[METRIC_COLUMNS].astype(float)

baseline_df.to_csv("data_analysis/questionnaire/baseline.csv", index=False)
baseline_scores = baseline_df.set_index("User ID")[METRIC_COLUMNS + LIKERT_COLUMNS]

In [5]:
experiment_df = pd.read_csv("data_analysis/questionnaire/experiment_origin.csv")[
    QUALTRICS_COLUMNS[:-1] + QUALTRICS_ADDITIONAL_COLUMNS + [QUALTRICS_COLUMNS[-1]]
]
experiment_questions = experiment_df.loc[0, :]
experiment_questions.to_json(
    "data_analysis/questionnaire/experiment_questions.json", indent=4
)

experiment_df = experiment_df.drop([0, 1])
experiment_df["User ID"] = experiment_df["User ID"].str.upper()
experiment_df = experiment_df.sort_values(by=["User ID"]).reset_index(drop=True)
experiment_df["Consistency"] = (
    experiment_df["User ID"]
    .apply(lambda uid: Prompter(uid).get_consistency())
    .fillna(0)
)
experiment_df["Total Time"] = pd.read_csv("data_analysis/time_analysis/experiment.csv")["Total Time"]
experiment_df["Overconfidence"] = (experiment_df["Confidence"].astype(int) - 1)/6 - experiment_df["Consistency"].astype(float)
experiment_df[LIKERT_COLUMNS] = experiment_df[LIKERT_COLUMNS].astype(int)
experiment_df[QUALTRICS_ADDITIONAL_COLUMNS] = experiment_df[QUALTRICS_ADDITIONAL_COLUMNS].astype(int)
experiment_df[METRIC_COLUMNS] = experiment_df[METRIC_COLUMNS].astype(float)

experiment_df.to_csv("data_analysis/questionnaire/experiment.csv", index=False)
experiment_scores = experiment_df.set_index("User ID")[METRIC_COLUMNS + LIKERT_COLUMNS]

In [6]:
C1 = [
    "C1",
    "C1.1",
    "C1.2",
    "C1.3",
]

C2 = [
    "C2",
    "C2.1",
    "C2.2",
    "C2.3",
]

C3 = [
    "C3.1",
    "C3.2",
    "C3.3",
    "C3.4",
    "C3.5",
]

In [7]:
baseline_scores.describe()

Unnamed: 0,Consistency,Total Time,Overconfidence,Cognitive Load,Confidence,C1,C1.1,C1.2,C1.3,C2,C2.1,C2.2,C2.3,C3.1,C3.2,C3.3,C3.4,C3.5
count,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0
mean,0.785714,1350.38081,-0.150794,3.285714,4.809524,4.666667,3.857143,4.809524,4.666667,4.0,3.809524,4.095238,4.761905,4.714286,4.952381,3.619048,2.952381,3.47619
std,0.149284,448.413302,0.218484,1.419255,1.40068,1.591645,1.768777,1.536849,1.460593,1.702939,1.91361,1.729299,1.757975,1.95302,1.627151,1.596126,1.564487,1.83355
min,0.4,815.695,-0.8,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0
25%,0.7,994.212,-0.233333,2.0,4.0,4.0,3.0,4.0,3.0,3.0,2.0,3.0,4.0,3.0,4.0,2.0,2.0,2.0
50%,0.8,1238.441,-0.133333,3.0,5.0,5.0,4.0,5.0,5.0,4.0,4.0,4.0,5.0,5.0,5.0,4.0,3.0,3.0
75%,0.9,1582.555,-0.066667,4.0,6.0,6.0,5.0,6.0,6.0,5.0,6.0,5.0,6.0,6.0,6.0,5.0,4.0,5.0
max,1.0,2347.575,0.133333,7.0,7.0,7.0,7.0,7.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,6.0,6.0,7.0


In [8]:
experiment_scores.describe()

Unnamed: 0,Consistency,Total Time,Overconfidence,Cognitive Load,Confidence,C1,C1.1,C1.2,C1.3,C2,C2.1,C2.2,C2.3,C3.1,C3.2,C3.3,C3.4,C3.5
count,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0
mean,0.885714,1877.008381,-0.171429,3.0,5.285714,5.619048,5.047619,5.571429,5.428571,3.904762,3.428571,5.761905,5.857143,5.333333,4.190476,4.571429,4.809524,4.714286
std,0.159015,628.046794,0.169406,1.643168,1.146423,0.86465,1.20317,1.028175,0.978337,1.410842,1.804756,0.94365,1.195229,1.42595,1.364516,1.535299,2.015417,1.230563
min,0.4,871.937,-0.566667,1.0,2.0,4.0,3.0,3.0,3.0,2.0,1.0,3.0,3.0,2.0,2.0,1.0,1.0,2.0
25%,0.8,1384.102,-0.333333,2.0,5.0,5.0,4.0,5.0,5.0,3.0,2.0,5.0,6.0,5.0,3.0,4.0,3.0,4.0
50%,0.9,1806.257,-0.166667,3.0,6.0,6.0,5.0,6.0,5.0,4.0,3.0,6.0,6.0,6.0,4.0,5.0,6.0,5.0
75%,1.0,2049.689,-0.066667,4.0,6.0,6.0,6.0,6.0,6.0,5.0,5.0,6.0,7.0,6.0,5.0,6.0,6.0,6.0
max,1.0,3312.681,0.133333,7.0,7.0,7.0,7.0,7.0,7.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0


In [9]:
baseline_scores[C1].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/baseline_c1.csv", index=True)
baseline_scores[C2].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/baseline_c2.csv", index=True)
baseline_scores[C3].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/baseline_c3.csv", index=True)

In [10]:
experiment_scores[C1].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/experiment_c1.csv", index=True)
experiment_scores[C2].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/experiment_c2.csv", index=True)
experiment_scores[C3].describe().T[["mean", "std", "min", "50%", "max"]].round({"mean": 2, "std": 2}).to_csv("data_analysis/questionnaire/experiment_c3.csv", index=True)

In [11]:
from scipy import stats

In [12]:
baseline_scores[["Consistency", "Total Time", "Cognitive Load", "Confidence"]].corr()

Unnamed: 0,Consistency,Total Time,Cognitive Load,Confidence
Consistency,1.0,0.182439,0.138224,0.416754
Total Time,0.182439,1.0,0.358955,-0.089741
Cognitive Load,0.138224,0.358955,1.0,-0.323381
Confidence,0.416754,-0.089741,-0.323381,1.0


In [13]:
stats.spearmanr(baseline_scores["Consistency"], baseline_scores["Confidence"])

SignificanceResult(statistic=0.3983135143028196, pvalue=0.07371413524811657)

In [14]:
experiment_scores[["Consistency", "Total Time", "Cognitive Load", "Confidence"]].corr()

Unnamed: 0,Consistency,Total Time,Cognitive Load,Confidence
Consistency,1.0,0.045521,-0.19136,0.544633
Total Time,0.045521,1.0,-0.269068,-0.02621
Cognitive Load,-0.19136,-0.269068,1.0,-0.398139
Confidence,0.544633,-0.02621,-0.398139,1.0


In [15]:
stats.spearmanr(experiment_scores["Consistency"], experiment_scores["Confidence"])

SignificanceResult(statistic=0.32773889496843356, pvalue=0.14696451101633506)

In [16]:
baseline_scores.reset_index(drop=True).corrwith(experiment_scores.reset_index(drop=True), method="spearman")[["Consistency", "Confidence"]]

Consistency    0.116355
Confidence    -0.040245
dtype: float64

In [17]:
baseline_scores["Overconfidence"].mean(), experiment_scores["Overconfidence"].mean()

(-0.15079365079365076, -0.17142857142857146)

In [18]:
experiment_scores.mean(axis=0) - baseline_scores.mean(axis=0)

Consistency         0.100000
Total Time        526.627571
Overconfidence     -0.020635
Cognitive Load     -0.285714
Confidence          0.476190
C1                  0.952381
C1.1                1.190476
C1.2                0.761905
C1.3                0.761905
C2                 -0.095238
C2.1               -0.380952
C2.2                1.666667
C2.3                1.095238
C3.1                0.619048
C3.2               -0.761905
C3.3                0.952381
C3.4                1.857143
C3.5                1.238095
dtype: float64

In [19]:
additional_columns_result = experiment_df.set_index("User ID")[QUALTRICS_ADDITIONAL_COLUMNS].astype(int).describe().T
additional_columns_result['question'] = additional_columns_result.index.map(lambda x: experiment_questions.get(x, x))

In [20]:
additional_columns_result[['question', 'mean', 'std', 'min', 'max', '25%', '50%', '75%']].to_csv('data_analysis/result/additional_experiment_questions.csv')

In [21]:
additional_columns_result[["mean", "std"]].round({"mean": 2, "std": 2}).T.to_csv("data_analysis/result/additional_experiment_questions_mean_std.csv", index=True)