In [22]:
import numpy as np
import pandas as pd

In [23]:
demographic = pd.read_csv("questionnaire/demographic_origin.csv")
demographic_questions = demographic.loc[0, :]
demographic_questions.to_json("questionnaire/demographic_questions.json", indent=4)
demographic = demographic.drop([0, 1]).sort_values(by=["UID"]).reset_index(drop=True)

demographic["Group"] = demographic["UID"].apply(lambda x: 2 - int(x[-2:]) % 2)
demographic["Age"] = demographic["Age"].astype(int)
gender_map = {
    1: "Male",
    2: "Female",
    3: "Non-binary / other",
}
demographic["Gender"] = demographic["Gender"].astype(int).apply(gender_map.get)
demographic["Data Labeling System"] = demographic["Data Labeling System"].astype(int)
demographic["Robotics"] = demographic["Robotics"].astype(int)
demographic = demographic[
    [
        "Group",
        "UID",
        "Age",
        "Gender",
        "Data Labeling System",
        "Robotics"
    ]
]
demographic.to_csv("questionnaire/demographic.csv", index=False)

In [24]:
demographic_group = demographic.groupby("Group")
group1 = demographic_group.get_group(1)
group2 = demographic_group.get_group(2)

In [25]:
np.unique(demographic["Gender"].values, return_counts=True)

(array(['Female', 'Male', 'Non-binary / other'], dtype=object),
 array([22, 18,  2]))

In [26]:
np.unique(group1["Gender"].values, return_counts=True), np.unique(group2["Gender"].values, return_counts=True)

((array(['Female', 'Male'], dtype=object), array([11, 10])),
 (array(['Female', 'Male', 'Non-binary / other'], dtype=object),
  array([11,  8,  2])))

In [27]:
def get_states(df, index_postfix=""):
    stats_df = df.describe().T
    stats_df["question"] = stats_df.index.map(lambda x: demographic_questions.get(x, x))
    stats_df = stats_df[["question", "mean", "std", "min", "max", "25%", "50%", "75%"]]
    stats_df.index = stats_df.index.map(lambda x: x + index_postfix)
    return stats_df

In [28]:
demographic_stats = pd.concat([
    get_states(demographic),
    get_states(group1, "_group1"),
    get_states(group2, "_group2"),
], axis=0)
demographic_stats

Unnamed: 0,question,mean,std,min,max,25%,50%,75%
Group,Group,1.5,0.506061,1.0,2.0,1.0,1.5,2.0
Age,What is your age?,23.380952,3.169615,19.0,40.0,22.0,23.0,24.0
Data Labeling System,How familiar are you with data labeling / anno...,2.190476,0.862161,1.0,4.0,2.0,2.0,3.0
Robotics,How familiar are you with robotics?,2.0,0.796333,1.0,4.0,1.0,2.0,2.75
Group_group1,Group,1.0,0.0,1.0,1.0,1.0,1.0,1.0
Age_group1,What is your age?,23.714286,4.26782,19.0,40.0,22.0,23.0,23.0
Data Labeling System_group1,How familiar are you with data labeling / anno...,2.380952,0.920662,1.0,4.0,2.0,2.0,3.0
Robotics_group1,How familiar are you with robotics?,2.238095,0.889087,1.0,4.0,2.0,2.0,3.0
Group_group2,Group,2.0,0.0,2.0,2.0,2.0,2.0,2.0
Age_group2,What is your age?,23.047619,1.465476,19.0,25.0,23.0,23.0,24.0


In [29]:
demographic_stats.to_csv("result/demographic_stats.csv")

In [9]:
baseline_df = pd.read_csv('questionnaire/baseline.csv')
experiment_df = pd.read_csv('questionnaire/experiment.csv')

In [10]:
from scipy import stats

In [11]:
stats.spearmanr(baseline_df["C2"], group1["Robotics"])

SignificanceResult(statistic=-0.07562179734344433, pvalue=0.7445835548121034)

In [12]:
stats.spearmanr(experiment_df["C2"], group2["Robotics"])

SignificanceResult(statistic=0.12829588931598424, pvalue=0.5794255308653016)

In [13]:
stats.spearmanr(baseline_df["C2.1"], group1["Robotics"])

SignificanceResult(statistic=0.1930689986325814, pvalue=0.4017450656112169)

In [14]:
stats.spearmanr(experiment_df["C2.1"], group2["Robotics"])

SignificanceResult(statistic=0.15957163908879674, pvalue=0.48961584398096647)

In [15]:
stats.spearmanr(baseline_df["Consistency"], group1["Robotics"])

SignificanceResult(statistic=-0.12189806295549563, pvalue=0.5986299747530823)

In [16]:
stats.spearmanr(experiment_df["Consistency"], group2["Robotics"])


SignificanceResult(statistic=0.016906889344078208, pvalue=0.9420148006726367)

In [17]:
METRIC_COLUMNS = [
    "Consistency",
    "Total Time",
    "Overconfidence",
]

LIKERT_COLUMNS = [
    "Cognitive Load",
    "Confidence",
    "C1",
    "C1.1",
    "C1.2",
    "C1.3",
    "C2",
    "C2.1",
    "C2.2",
    "C2.3",
    "C3.1",
    "C3.2",
    "C3.3",
    "C3.4",
    "C3.5",
]

In [18]:
for metric in METRIC_COLUMNS + LIKERT_COLUMNS:
    print(metric)
    print(stats.spearmanr(baseline_df[metric], group1["Robotics"]))
    print(stats.spearmanr(experiment_df[metric], group2["Robotics"]))
    print()

Consistency
SignificanceResult(statistic=-0.12189806295549563, pvalue=0.5986299747530823)
SignificanceResult(statistic=0.016906889344078208, pvalue=0.9420148006726367)

Total Time
SignificanceResult(statistic=-0.34572487862403645, pvalue=0.12476331133080928)
SignificanceResult(statistic=-0.06776109129744837, pvalue=0.7704077629123255)

Overconfidence
SignificanceResult(statistic=0.017600946798055413, pvalue=0.9396385007707817)
SignificanceResult(statistic=0.1076595405114744, pvalue=0.6422879693932704)

Cognitive Load
SignificanceResult(statistic=-0.3443085637865754, pvalue=0.12641872956405525)
SignificanceResult(statistic=-0.05965697166712889, pvalue=0.7972784130994004)

Confidence
SignificanceResult(statistic=-0.016095754828765806, pvalue=0.9447925462255817)
SignificanceResult(statistic=0.11897938498391272, pvalue=0.6074780741243786)

C1
SignificanceResult(statistic=-0.008575323352732294, pvalue=0.9705717040632266)
SignificanceResult(statistic=0.2387429156564676, pvalue=0.297303605053