In [2]:
import pandas as pd
import numpy as np
from scipy import stats

# 交互数据

## 交互数据单方差分析

In [3]:
stat =  pd.read_csv("./stat_origin.csv")
stat.head()

Unnamed: 0,subject_id,task_round,mouse_distance,mouse_click,keyboard_press,duration,cost
0,2,0,137703,350,395,761.563,258.646
1,2,1,16718,8,1092,341.584,236.761
2,3,0,193067,385,430,852.177,327.301
3,3,1,16922,20,785,397.007,176.775
4,4,0,269443,470,382,1143.378,406.428


In [4]:
def anova(df: pd.DataFrame ,column: str):
    return stats.f_oneway(df[column][df['task_round'] == 0], df[column][df['task_round'] == 1])

In [5]:
stat_anova_res = pd.DataFrame(columns=['样本数据', 'F', 'p'])
stat_anova_res

Unnamed: 0,样本数据,F,p


In [6]:
for column in stat.columns.drop(["subject_id", "task_round"]):
    F, p = anova(stat, column)
    # stat_anova_res = stat_anova_res.append({'样本数据': column, 'F': F, 'p': p})
    stat_anova_res = pd.concat([stat_anova_res, pd.DataFrame.from_records([{'样本数据': column, 'F': F, 'p': p}])])
stat_anova_res['样本数据'] = stat_anova_res['样本数据'].map({"mouse_distance": "鼠标移动距离", "mouse_click": "鼠标点击次数", "keyboard_press": "键盘按键次数", "duration": "任务时长", "cost": "操作成本"})
stat_anova_res["F"] =stat_anova_res["F"].apply('{:.3f}'.format)
stat_anova_res["p"] =stat_anova_res["p"].apply('{:.3e}'.format)
stat_anova_res.to_csv("./stat_anova.csv", index=False)
stat_anova_res

Unnamed: 0,样本数据,F,p
0,鼠标移动距离,170.684,5.129e-16
0,鼠标点击次数,403.055,1.697e-22
0,键盘按键次数,45.804,3.953e-08
0,任务时长,52.588,8.386e-09
0,操作成本,4.956,0.0317


## 交互数据正态性检验

In [7]:
def shapiro_wilk(df: pd.DataFrame ,column: str):
    round_0_res =  stats.shapiro(df[column][df['task_round'] == 0])
    round_1_res =  stats.shapiro(df[column][df['task_round'] == 1])
    return pd.DataFrame.from_records([{'样本数据': column, 'task_round': 0, 'W': round_0_res[0], 'p': round_0_res[1]}, {'样本数据': column, 'task_round': 1, 'W': round_1_res[0], 'p': round_1_res[1]}])
res = pd.DataFrame(columns=['样本数据', 'task_round', 'W', 'p'])
for column in stat.columns.drop(["subject_id", "task_round"]):
    res = pd.concat([res, shapiro_wilk(stat, column)])


res["样本数据"] = res["样本数据"].map({"mouse_distance": "鼠标移动距离", "mouse_click": "鼠标点击次数", "keyboard_press": "键盘按键次数", "duration": "任务时长", "cost": "操作成本"})
res["task_round"] = res["task_round"].map({0: "WIMP", 1: "M-CAT"})
res.rename(columns={"W": "W 值", "p": "p 值", "task_round": "任务组", "样本数据": "样本数据"}, inplace=True)

res = res.assign(通过检验 = lambda x: x["p 值"] > 0.05 )
res["通过检验"] = res["通过检验"].map({True: "通过", False: "拒绝"})

res.to_csv("./stat_shapiro_wilk.csv", index=False, float_format='%.4f')
res

Unnamed: 0,样本数据,任务组,W 值,p 值,通过检验
0,鼠标移动距离,WIMP,0.932694,0.155871,通过
1,鼠标移动距离,M-CAT,0.852645,0.004719,拒绝
0,鼠标点击次数,WIMP,0.946222,0.288435,通过
1,鼠标点击次数,M-CAT,0.770968,0.000244,拒绝
0,键盘按键次数,WIMP,0.947406,0.304001,通过
1,键盘按键次数,M-CAT,0.89754,0.031362,拒绝
0,任务时长,WIMP,0.955105,0.423498,通过
1,任务时长,M-CAT,0.936615,0.186665,通过
0,操作成本,WIMP,0.926879,0.119174,通过
1,操作成本,M-CAT,0.897209,0.030905,拒绝


In [8]:
stats.shapiro(stat["mouse_distance"][stat['task_round'] == 0])

ShapiroResult(statistic=0.9326943159103394, pvalue=0.15587051212787628)

## 交互数据组内方差齐性检验

In [9]:
def levene(df: pd.DataFrame ,column: str):
    return stats.levene(df[column][df['task_round'] == 0], df[column][df['task_round'] == 1])

In [10]:
stat_levene_res = pd.DataFrame(columns=['样本数据', 'F', 'p'])
for column in stat.columns.drop(["subject_id", "task_round"]):
    F, p = levene(stat, column)
    stat_levene_res = pd.concat([stat_levene_res, pd.DataFrame.from_records([{'样本数据': column, 'F': F, 'p': p}])])
stat_levene_res = stat_levene_res.assign(通过检验 = lambda x: x["p"] > 0.05 )

In [11]:
stat_levene_res["样本数据"] = stat_levene_res["样本数据"].map({"mouse_distance": "鼠标移动距离", "mouse_click": "鼠标点击次数", "keyboard_press": "键盘按键次数", "duration": "任务时长", "cost": "操作成本"})
stat_levene_res["F"] =stat_levene_res["F"].apply('{:.3f}'.format)
stat_levene_res["p"] =stat_levene_res["p"].apply('{:.5f}'.format)
stat_levene_res["通过检验"] = stat_levene_res["通过检验"].map({True: "通过", False: "拒绝"})
stat_levene_res.to_csv("./stat_levene.csv", index=False)
stat_levene_res

Unnamed: 0,样本数据,F,p,通过检验
0,鼠标移动距离,34.09,0.0,拒绝
0,鼠标点击次数,17.161,0.00017,拒绝
0,键盘按键次数,7.549,0.00896,拒绝
0,任务时长,1.683,0.20196,通过
0,操作成本,0.291,0.59248,通过


## 交互数据组间差异检验

由于鼠标移动距离有一组样本未通过正态性检验，将联合采用 Welch’s t
检验和 Kruskal-Wallis 单因素方差分析综合分析。Welch’s t 检验是一种参数检验方法，用对比较均值差异，但 Welch’s t 也支持近似正态分布的检验，但结果可能不够准确。Kruskal-Wallis 单因素方差分析是一种非参数检验方法，用于检验样本间中位数差异，不要求总体分布参数。

鼠标点击次数和键盘按键次数通过了正态性检验，但未通过方差齐性检验，因此采用 Welch’s t 检验。Welch’s t 检验不要求样本间方差相同。

任务时间和操作成本通过了正态性检验和方差齐性检验，使用 ANOVA 单因素方差分析进行检验。

### 鼠标移动距离（Wt 和 KW）

In [12]:
mouse_distance_0 = stat["mouse_distance"][stat['task_round'] == 0]
mouse_distance_1 = stat["mouse_distance"][stat['task_round'] == 1]

In [13]:
mouse_distance_t, mouse_distance_p_0 =stats.ttest_ind(mouse_distance_0, mouse_distance_1, equal_var=False)
mouse_distance_H, mouse_distance_p_1 = stats.kruskal(mouse_distance_0, mouse_distance_1)
print(f"鼠标移动距离的 t 检验结果为：t={mouse_distance_t:.3f}, p={mouse_distance_p_0:.3e}")
print(f"鼠标移动距离的 Kruskal 检验结果为：H={mouse_distance_H:.3f}, p={mouse_distance_p_1:.3e}")

鼠标移动距离的 t 检验结果为：t=13.065, p=7.175e-12
鼠标移动距离的 Kruskal 检验结果为：H=30.767, p=2.909e-08


### 鼠标点击次数 键盘击键次数（Wt）

In [14]:
mouse_click_t, mouse_click_p = stats.ttest_ind(stat["mouse_click"][stat['task_round'] == 0], stat["mouse_click"][stat['task_round'] == 1], equal_var=False)
print(f"鼠标击键次数的 t 检验结果为：t={mouse_click_t:.3f}, p={mouse_click_p:.3e}")

鼠标击键次数的 t 检验结果为：t=20.076, p=2.591e-15


In [15]:
keyboard_press_t, keyboard_press_p = stats.ttest_ind(stat["keyboard_press"][stat['task_round'] == 0], stat["keyboard_press"][stat['task_round'] == 1], equal_var=False)
print(f"键盘按键次数的 t 检验结果为：t={keyboard_press_t:.3f}, p={keyboard_press_p:.3e}")

键盘按键次数的 t 检验结果为：t=-6.768, p=7.372e-07


### 任务时间（ANOVA）

In [16]:
F, p = stats.f_oneway(stat["duration"][stat['task_round'] == 0], stat["duration"][stat['task_round'] == 1])
print(f"任务时长的 ANOVA 检验结果为：~$t = {F:.3f}, p = {p:.3e}$~")

任务时长的 ANOVA 检验结果为：~$t = 52.588, p = 8.386e-09$~


### 操作成本

In [17]:
F_cost, p_cost = stats.f_oneway(stat["cost"][stat['task_round'] == 0], stat["cost"][stat['task_round'] == 1])
print(f"操作成本的 ANOVA 检验结果为：~$t = {F:.3f}, p = {p:.3e}$~")

操作成本的 ANOVA 检验结果为：~$t = 52.588, p = 8.386e-09$~


In [31]:
cost_t, cost_p_0 = stats.ttest_ind(stat["cost"][stat['task_round'] == 0], stat["cost"][stat['task_round'] == 1], equal_var=False)
print(f"操作成本 t 检验结果为 t={cost_t}，p={cost_p_0}")
cost_H, cost_p_1 = stats.kruskal(stat["cost"][stat['task_round'] == 0], stat["cost"][stat['task_round'] == 1])
print(f"操作成本的 Kruskal-Wallis 检验结果为 H={cost_H}，p={cost_p_1}")

操作成本 t 检验结果为 t=2.2262097359392756，p=0.03262617100453932
操作成本的 Kruskal-Wallis 检验结果为 H=6.778832463217839，p=0.009224520892150635


### 汇总结果

In [18]:
res = pd.DataFrame(columns=['样本数据','检验方法', '统计量','值', 'p'])
res.loc[0] = ["鼠标移动距离", "Welch's t","t", mouse_distance_t, mouse_distance_p_0]
res.loc[1] = ["鼠标移动距离", "Kruskal-Wallis","H", mouse_distance_H, mouse_distance_p_1]
res.loc[2] = ["鼠标点击次数", "Welch's t","t", mouse_click_t, mouse_click_p]
res.loc[3] = ["键盘按键次数", "Welch's t","t", keyboard_press_t, keyboard_press_p]
res.loc[4] = ["任务时长", "ANOVA","F", F, p]
res.loc[5] = ["操作成本", "ANOVA","F", F_cost, p_cost]
res = res.assign(pas="通过")
res["p"] = res["p"].apply('{:.3e}'.format)
res["值"] = res["值"].apply('{:.3f}'.format)
res = res.rename({"pas": "通过（p<0.05）"}, axis=1)
res.to_csv("stat_result.csv", index=False, float_format='%.3f')
res


Unnamed: 0,样本数据,检验方法,统计量,值,p,通过（p<0.05）
0,鼠标移动距离,Welch's t,t,13.065,7.175e-12,通过
1,鼠标移动距离,Kruskal-Wallis,H,30.767,2.909e-08,通过
2,鼠标点击次数,Welch's t,t,20.076,2.591e-15,通过
3,键盘按键次数,Welch's t,t,-6.768,7.372e-07,通过
4,任务时长,ANOVA,F,52.588,8.386e-09,通过
5,操作成本,ANOVA,F,4.956,0.0317,通过


# 量表数据

## 量表数据正态性检验

In [19]:
scale = pd.read_csv("./scale_origin.csv")
scale.head()

Unnamed: 0,subject_id,task_round,sus,effect,easy,easy_learn,satisfaction
0,202,1,77.5,52.6,64.6,26.6,45
1,202,0,85.0,51.6,61.4,24.8,44
2,206,1,57.5,49.6,58.8,24.0,48
3,206,0,55.0,43.0,50.8,19.2,34
4,205,1,60.0,48.8,64.0,26.6,49


In [20]:
res = pd.DataFrame(columns=['样本数据', 'task_round', 'W', 'p'])
for column in scale.columns.drop(["subject_id", "task_round"]):
    res = pd.concat([res, shapiro_wilk(scale, column)])
res = res.assign(通过检验 = lambda x: x["p"] > 0.05 )
res["task_round"] = res["task_round"].map({0: "WIMP", 1: "M-CAT"})
res["通过检验"] = res["通过检验"].map({True: "通过", False: "拒绝"})
res.rename(columns={"task_round": "任务组"}, inplace=True)

res["样本数据"] = res["样本数据"].map({"sus": "SUS 评分", "effect": "USE 有效性评分", "easy": "USE 易用性", "easy_learn": "USE 易学性评分", "satisfaction": "USE 满意度评分", "mouse_distance": "鼠标移动距离", "mouse_click": "鼠标点击次数", "keyboard_press": "键盘按键次数", "duration": "任务时长"})
res.to_csv("./scale_shapiro_wilk.csv", index=False, float_format='%.4f')
res

Unnamed: 0,样本数据,任务组,W,p,通过检验
0,SUS 评分,WIMP,0.965217,0.626644,通过
1,SUS 评分,M-CAT,0.950176,0.343267,通过
0,USE 有效性评分,WIMP,0.97587,0.855925,通过
1,USE 有效性评分,M-CAT,0.90446,0.042738,拒绝
0,USE 易用性,WIMP,0.94997,0.340198,通过
1,USE 易用性,M-CAT,0.903726,0.041349,拒绝
0,USE 易学性评分,WIMP,0.965883,0.641343,通过
1,USE 易学性评分,M-CAT,0.748586,0.000119,拒绝
0,USE 满意度评分,WIMP,0.906416,0.04668,拒绝
1,USE 满意度评分,M-CAT,0.804174,0.00076,拒绝


## 量表数据组间方差齐性检验

In [21]:
res = pd.DataFrame(columns=['样本数据', 'F', 'p'])
for column in scale.columns.drop(["subject_id", "task_round"]):
    F, p = levene(scale, column)
    res = pd.concat([res, pd.DataFrame({'样本数据': column, 'F': F, 'p': p}, index=[0])])
res = res.assign(通过检验 = lambda x: x["p"] > 0.05 )

res["通过检验"] = res["通过检验"].map({True: "通过", False: "拒绝"})
res["样本数据"] = res["样本数据"].map({"sus": "SUS 评分", "effect": "USE 有效性评分", "easy": "USE 易用性", "easy_learn": "USE 易学性评分", "satisfaction": "USE 满意度评分", "mouse_distance": "鼠标移动距离", "mouse_click": "鼠标点击次数", "keyboard_press": "键盘按键次数", "duration": "任务时长"})
res.to_csv("./scale_levene.csv", index=False, float_format='%.4f')

In [22]:
res

Unnamed: 0,样本数据,F,p,通过检验
0,SUS 评分,1.636567,0.2081685,通过
0,USE 有效性评分,9.06596,0.004496234,拒绝
0,USE 易用性,16.599755,0.0002124592,拒绝
0,USE 易学性评分,19.009078,8.858726e-05,拒绝
0,USE 满意度评分,35.010083,6.184897e-07,拒绝


## 量表数据推论统计

SUS 评分和 USE 易用性评分通过了正态性检验和方差齐性检验，使用 ANOVA 单因素方差分析检验均值差异。取显著性水平为 $\alpha = 0.05$，在 $p > 0.05$ 时通过检验。

USE 有效性、易学性和满意度均未通过正态性检验和方差齐性检验，联合采用 Welch’s t 检验和 Kruskal-Wallis 单因素方差分析综合分析组内差异。取显著性水平为 $\alpha = 0.05$，在 $p > 0.05$ 时通过检验。

### SUS 和 USE 易用性（ANOVA）

In [23]:
scale.head()

Unnamed: 0,subject_id,task_round,sus,effect,easy,easy_learn,satisfaction
0,202,1,77.5,52.6,64.6,26.6,45
1,202,0,85.0,51.6,61.4,24.8,44
2,206,1,57.5,49.6,58.8,24.0,48
3,206,0,55.0,43.0,50.8,19.2,34
4,205,1,60.0,48.8,64.0,26.6,49


In [24]:
sus_F, sus_p = stats.f_oneway(scale["sus"][scale["task_round"]==0],scale["sus"][scale["task_round"]==1] )
print(f"SUS 评分 ANOVA 结果为 F={sus_F}，p={sus_p}")
easy_F, easy_p = stats.f_oneway(scale["easy"][scale["task_round"]==0],scale["easy"][scale["task_round"]==1] )
print(f"USE 易用性评分 ANOVA 结果为 F={easy_F}，p={easy_p}")



SUS 评分 ANOVA 结果为 F=20.159255429162357，p=5.9177526720979305e-05
USE 易用性评分 ANOVA 结果为 F=52.1879040808731，p=9.159259928714244e-09


In [29]:
easy_t, easy_p_0 = stats.ttest_ind(scale["easy"][scale["task_round"]==0],scale["easy"][scale["task_round"]==1], equal_var=False)
print(f"易用性评分 t 检验结果为 t={easy_t}，p={easy_p_0}")
easy_H, easy_p_1 = stats.kruskal(scale["easy"][scale["task_round"]==0],scale["easy"][scale["task_round"]==1])
print(f"易用性的 Kruskal-Wallis 检验结果为 H={easy_H}，p={easy_p_1}")

易用性评分 t 检验结果为 t=-7.224119605936288，p=8.573333285404681e-08
易用性的 Kruskal-Wallis 检验结果为 H=24.26682539682541，p=8.387121825389755e-07


### 有效性、易学性和满意度（Wt 和 KW）

In [25]:
effect_t, effect_p_0 = stats.ttest_ind(scale["effect"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1], equal_var=False)
print(f"USE 有效性评分 t 检验结果为 t={effect_t}，p={effect_p_0}")
effect_H, effect_p_1 = stats.kruskal(scale["effect"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1])
print(f"USE 有效性的 Kruskal-Wallis 检验结果为 H={effect_H}，p={effect_p_1}")

USE 有效性评分 t 检验结果为 t=-6.6865312875986245，p=2.7172026660385573e-07
USE 有效性的 Kruskal-Wallis 检验结果为 H=24.861110144423165，p=6.161291972043212e-07


In [26]:
easy_learn_t, easy_learn_p_0 = stats.ttest_ind(scale["easy_learn"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1], equal_var=False)
print(f"USE 易学性评分 t 检验结果为 t={easy_learn_t}，p={easy_learn_p_0}")
easy_learn_H, easy_learn_p_1 = stats.kruskal(scale["easy_learn"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1])
print(f"USE 易学性的 Kruskal-Wallis 检验结果为 H={easy_learn_H}，p={easy_learn_p_1}")

USE 易学性评分 t 检验结果为 t=-23.479174833419833，p=4.030525478082046e-24
USE 易学性的 Kruskal-Wallis 检验结果为 H=30.83991228070175，p=2.8021495500642376e-08


In [27]:
satisfaction_t, satisfaction_p_0 = stats.ttest_ind(scale["satisfaction"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1], equal_var=False)
print(f"USE 满意度评分 t 检验结果为 t={satisfaction_t}，p={satisfaction_p_0}")
satisfaction_H, satisfaction_p_1 = stats.kruskal(scale["satisfaction"][scale["task_round"]==0],scale["effect"][scale["task_round"]==1])
print(f"USE 满意度的 Kruskal-Wallis 检验结果为 H={satisfaction_H}，p={satisfaction_p_1}")

USE 满意度评分 t 检验结果为 t=-8.908698654372229，p=1.432928124949066e-09
USE 满意度的 Kruskal-Wallis 检验结果为 H=30.312539682539693，p=3.677407950660983e-08


#### 汇总结果

In [28]:
res = pd.DataFrame(columns=['样本数据', '检验方法', '统计量','值', 'p'])
res.loc[0] = ["SUS 评分", "ANOVA","F", sus_F, sus_p]
res.loc[1] = ["USE 易用性评分", "ANOVA","F", easy_F, easy_p]
res.loc[2] = ["USE 有效性评分", "Welch's t","t", effect_t, effect_p_0]
res.loc[3] = ["USE 有效性评分", "Kruskal-Wallis","H", effect_H, effect_p_1]
res.loc[4] = ["USE 易学性评分", "Welch's t","t", easy_learn_t, easy_learn_p_0]
res.loc[5] = ["USE 易学性评分", "Kruskal-Wallis","H", easy_learn_H, easy_learn_p_1]
res.loc[6] = ["USE 满意度评分", "Welch's t", "t", satisfaction_t, satisfaction_p_0]
res.loc[7] = ["USE 满意度评分", "Kruskal-Wallis", "H", satisfaction_H, satisfaction_p_1]
res = res.assign(pas="通过")
res["p"] = res["p"].apply('{:.3e}'.format)
res["值"] = res["值"].apply('{:.3f}'.format)
res = res.rename({"pas": "通过（p<0.05）"}, axis=1)
res.to_csv("scale_result.csv", index=False, float_format='%.3f')
res

Unnamed: 0,样本数据,检验方法,统计量,值,p,通过（p<0.05）
0,SUS 评分,ANOVA,F,20.159,5.918e-05,通过
1,USE 易用性评分,ANOVA,F,52.188,9.159e-09,通过
2,USE 有效性评分,Welch's t,t,-6.687,2.717e-07,通过
3,USE 有效性评分,Kruskal-Wallis,H,24.861,6.161e-07,通过
4,USE 易学性评分,Welch's t,t,-23.479,4.031e-24,通过
5,USE 易学性评分,Kruskal-Wallis,H,30.84,2.802e-08,通过
6,USE 满意度评分,Welch's t,t,-8.909,1.433e-09,通过
7,USE 满意度评分,Kruskal-Wallis,H,30.313,3.677e-08,通过
