1. preprocessing 

In [1]:
import pandas as pd
data = pd.read_csv('./aoi based all features/yan lingshan Metrics.tsv', sep="\t")

# Step 1: Remove unnecessary columns
columns_to_drop = ['Recording', 'gender', 'glasses',  'Timeline', 'Interval', 'Media']
data_cleaned = data.drop(columns=columns_to_drop)

In [2]:
# Step 2: Separate task-level data and summary data
task_level_data = data_cleaned[data_cleaned['TOI'] != 'Entire Recording']
summary_data = data_cleaned[data_cleaned['TOI'] == 'Entire Recording']

# Save both tables as TSV files
task_level_data.to_csv("task_level_data.tsv", sep="\t", index=False)
summary_data.to_csv("summary_data.tsv", sep="\t", index=False)
print("Task-level data:", task_level_data.shape)
print("Summary data:", summary_data.shape)

Task-level data: (166, 17)
Summary data: (166, 17)


2. read task level 

In [29]:
import pandas as pd
from statsmodels.formula.api import mixedlm

file_path_new = './task_level_data.tsv'
task_level_data_new = pd.read_csv(file_path_new, sep="\t")

missing values

In [30]:
# Find participants with fewer than 7 rows and remove the data
participant_counts = task_level_data_new['Participant'].value_counts()
participants_to_remove = participant_counts[participant_counts != 7].index
task_level_data_new = task_level_data_new[~task_level_data_new['Participant'].isin(participants_to_remove)]

task_level_data_new.to_csv("filtered_task_level_data.tsv", sep="\t", index=False)
print(f" {len(participants_to_remove)} Participants' data have been deleted")

 1 Participants' data have been deleted


In [48]:
# modify column names 
task_level_data_filter = pd.read_csv("./filtered_task_level_data.tsv", sep="\t")
# Remove all "(1)" from the TOI column
task_level_data_new['TOI'] = task_level_data_new['TOI'].str.replace(r"\s*\(1\)", "", regex=True)
print(task_level_data_new['TOI'].unique())

task_level_data_new.to_csv("filtered_task_level_data.tsv", sep="\t", index=False)

['all-1' 'all-2' 'all-3' 'title-1' 'title-2' 'title-3']


In [49]:
# Remove rows where 'AOI' column contains 'all 2 final with pic'
task_level_data_filter = task_level_data_filter[task_level_data_filter['AOI'] != 'all 2 final with pic']

# Clean field formats: handle numeric values with commas
task_level_data_filter['Peak_velocity_of_entry_saccade'] = (
    task_level_data_filter['Peak_velocity_of_entry_saccade'].str.replace(',', '.').astype(float)
)
task_level_data_filter['Peak_velocity_of_exit_saccade'] = (
    task_level_data_filter['Peak_velocity_of_exit_saccade'].str.replace(',', '.').astype(float)
)

# Add language grouping variable
task_level_data_filter['LanguageGroup'] = task_level_data_filter['primary_language_family'].apply(
    lambda x: 'Group1' if x == 'alphabetic' else 'Group2'
)

# Add text format and task difficulty variables
task_level_data_filter['Format'] = task_level_data_filter['TOI'].apply(
    lambda x: 'Uppercase' if 'all' in x.lower() else 'TitleCase'
)
task_level_data_filter['Difficulty'] = task_level_data_filter['TOI'].apply(
    lambda x: 'Easy' if '1' in x else ('Medium' if '2' in x else 'Hard')
)


AttributeError: Can only use .str accessor with string values!

In [46]:

# Add the correct answer for each task
correct_answers = {
    "all-1": 5,  
    "all-2": 3,
    "all-3": 7,
    "title-1": 6,
    "title-2": 3,
    "title-3": 5
    
}

# Add a column for correct answers
task_level_data_filter['CorrectAnswer'] = task_level_data_filter['TOI'].map(correct_answers)
task_level_data_filter.to_csv("filtered_task_level_data.tsv", sep="\t", index=False)

In [51]:
print(task_level_data_filter['CorrectAnswer'].isna().sum()) 
print(task_level_data_filter[['Last_key_press', 'CorrectAnswer']].dtypes)

0
Last_key_press    object
CorrectAnswer      int64
dtype: object


In [58]:
# Replace 'F' followed by a number with just the number
task_level_data_filter['Last_key_press'] = task_level_data_filter['Last_key_press'].str.replace(r'^F', '', regex=True)

# Ensure Last_key_press and CorrectAnswer are both string types
task_level_data_filter['Last_key_press'] = task_level_data_filter['Last_key_press'].astype(str)
task_level_data_filter['CorrectAnswer'] = task_level_data_filter['CorrectAnswer'].astype(str)

# Check if the user selected the correct answer (direct string comparison)
task_level_data_filter['IsCorrect'] = task_level_data_filter['Last_key_press'] == task_level_data_filter['CorrectAnswer']
task_level_data_filter['IsCorrect_numeric'] = task_level_data_filter['IsCorrect'].astype(int)
print(task_level_data_filter[['TOI', 'Last_key_press', 'CorrectAnswer', 'IsCorrect', 'IsCorrect_numeric']].head())

task_level_data_filter.to_csv("preprocessed_task_level_data.tsv", sep="\t", index=False)

       TOI Last_key_press CorrectAnswer  IsCorrect  IsCorrect_numeric
0    all-1              5             5       True                  1
1    all-2              3             3       True                  1
2    all-3              3             7      False                  0
3  title-1              6             6       True                  1
4  title-2              3             3       True                  1


# H1 - Time_to_first_fixation + Number_of_fixations
   
   你的假设 1 有两个特征（Time to First Fixation 和 Fixation Count），每个特征需要针对三个任务难度单独构建线性混合效应模型，总共需要 6 个模型 \


总体分析得到的结果 P值等于0.204,结果很差. 计算简单、中等和困难任务的 p value: 对每个任务难度单独建模

	•	Time_to_first_fixation 描述用户何时第一次注意到目标，是时间维度。
	•	Fixation_count 描述用户总共注视的次数，是频率维度。
	•	这两个特征分别从时间和频率描述注意力。不同维度，以及两个特征不相关性 所以单独分析。  

In [72]:
from statsmodels.formula.api import mixedlm

# function: analyze the impact of Format on different task difficulties (not considering language background)
def analyze_by_difficulty(feature, data, difficulties):
    print(f"\n=== Analyzing Feature: {feature} for Different Task Difficulties (Not Considering Language Group) ===")
    results = {}
    for difficulty in difficulties:
        print(f"\nTask Difficulty: {difficulty}")
        # Filter data for specific difficulty
        filtered_data = data[data['Difficulty'] == difficulty]
        
        # Build model
        model = mixedlm(f"{feature} ~ Format", filtered_data, groups="Participant", re_formula="~1")
        result = model.fit()
        results[difficulty] = result
        
        # Extract P-value and coefficient
        coef = result.params.get("Format[T.Uppercase]", None)
        p_value = result.pvalues.get("Format[T.Uppercase]", None)
        
        print(f"Coefficient (Format[T.Uppercase]): {coef:.4f}")
        print(f"P-value (Format[T.Uppercase]): {p_value:.4f}")
        if p_value < 0.05:
            if coef > 0:
                print(f"Result: For {difficulty} tasks, Uppercase format significantly increases {feature}.")
            else:
                print(f"Result: For {difficulty} tasks, Uppercase format significantly decreases {feature}.")
    
    return results

# function: analyze the impact of Format by language group (not considering task difficulty)
def analyze_by_language_group(feature, data):
    print(f"\n=== Analyzing Feature: {feature} for Different Language Groups (Not Considering Task Difficulty) ===")
    # Build model
    model = mixedlm(f"{feature} ~ Format * LanguageGroup", data, groups="Participant", re_formula="~1")
    result = model.fit()
    
    # Extract P-value and coefficient
    coef_format = result.params.get("Format[T.Uppercase]", None)
    coef_interaction = result.params.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
    p_value_format = result.pvalues.get("Format[T.Uppercase]", None)
    p_value_interaction = result.pvalues.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)

    print(f"Coefficient (Format[T.Uppercase]): {coef_format:.4f}")
    print(f"P-value (Format[T.Uppercase]): {p_value_format:.4f}")
    print(f"Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {coef_interaction:.4f}")
    print(f"P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {p_value_interaction:.4f}")
    if p_value_format < 0.05:
        if coef_format > 0:
            print(f"Result: Uppercase format significantly increases {feature} across all participants.")
        else:
            print(f"Result: Uppercase format significantly decreases {feature} across all participants.")
    
    if p_value_interaction < 0.05:
        if coef_interaction > 0:
            print(f"Result: Language Group 2 (logographic) is significantly more sensitive to Uppercase format.")
        else:
            print(f"Result: Language Group 1 (alphabetic) is significantly more sensitive to Uppercase format.")
    return result

# task difficulties and features to analyze
difficulties = ["Easy", "Medium", "Hard"]
features = ["Time_to_first_fixation", "Number_of_fixations"]

# Analyze each feature by task difficulty
for feature in features:
    analyze_by_difficulty(feature, task_level_data_filter, difficulties)

# Analyze each feature by language background
for feature in features:
    analyze_by_language_group(feature, task_level_data_filter)


=== Analyzing Feature: Time_to_first_fixation for Different Task Difficulties (Not Considering Language Group) ===

Task Difficulty: Easy
Coefficient (Format[T.Uppercase]): -674.7826
P-value (Format[T.Uppercase]): 0.1370

Task Difficulty: Medium
Coefficient (Format[T.Uppercase]): 3276.0000
P-value (Format[T.Uppercase]): 0.0020
Result: For Medium tasks, Uppercase format significantly increases Time_to_first_fixation.

Task Difficulty: Hard
Coefficient (Format[T.Uppercase]): -636.0870
P-value (Format[T.Uppercase]): 0.3671

=== Analyzing Feature: Number_of_fixations for Different Task Difficulties (Not Considering Language Group) ===

Task Difficulty: Easy
Coefficient (Format[T.Uppercase]): 0.7391
P-value (Format[T.Uppercase]): 0.7999

Task Difficulty: Medium
Coefficient (Format[T.Uppercase]): 23.2174
P-value (Format[T.Uppercase]): 0.0002
Result: For Medium tasks, Uppercase format significantly increases Number_of_fixations.

Task Difficulty: Hard




Coefficient (Format[T.Uppercase]): 67.1304
P-value (Format[T.Uppercase]): 0.0066
Result: For Hard tasks, Uppercase format significantly increases Number_of_fixations.

=== Analyzing Feature: Time_to_first_fixation for Different Language Groups (Not Considering Task Difficulty) ===
Coefficient (Format[T.Uppercase]): -122.3333
P-value (Format[T.Uppercase]): 0.8514
Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): 1986.6296
P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): 0.0570

=== Analyzing Feature: Number_of_fixations for Different Language Groups (Not Considering Task Difficulty) ===
Coefficient (Format[T.Uppercase]): 16.5476
P-value (Format[T.Uppercase]): 0.5698
Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): 35.3042
P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): 0.4481




结果分析

任务难度分析（不区分语言背景）

1. Time_to_first_fixation
	•	Easy:
	•	Coefficient: -674.7826
	•	P-value: 0.1370（不显著）
	•	结论: 在简单任务中，全大写字母格式对用户的 Time_to_first_fixation 没有显著影响。
	•	Medium:
	•	Coefficient: 3276.0000
	•	P-value: 0.0020（显著）
	•	结论: 在中等难度任务中，全大写字母显著增加了用户的 Time_to_first_fixation，即用户花更多时间将注意力集中在目标区域。
	•	Hard:
	•	Coefficient: -636.0870
	•	P-value: 0.3671（不显著）
	•	结论: 在困难任务中，全大写字母格式对用户的 Time_to_first_fixation 没有显著影响。

2. Number_of_fixations
	•	Easy:
	•	Coefficient: 0.7391
	•	P-value: 0.7999（不显著）
	•	结论: 在简单任务中，全大写字母格式对用户的 Number_of_fixations 没有显著影响。
	•	Medium:
	•	Coefficient: 23.2174
	•	P-value: 0.0002（显著）
	•	结论: 在中等难度任务中，全大写字母显著增加了用户的 Number_of_fixations，即用户注视次数更多，表明注意力集中度较高。
	•	Hard:
	•	Coefficient: 67.1304
	•	P-value: 0.0066（显著）
	•	结论: 在困难任务中，全大写字母显著增加了用户的 Number_of_fixations。

语言背景分析（不区分任务难度）

1. Time_to_first_fixation
	•	Format:
	•	Coefficient: -122.3333
	•	P-value: 0.8514（不显著）
	•	结论: 全大写字母格式对所有用户的 Time_to_first_fixation 没有显著总体影响。
	•	Interaction (Format × LanguageGroup):
	•	Coefficient: 1986.6296
	•	P-value: 0.0570（接近显著）
	•	结论: 语言背景可能调节全大写字母格式对用户 Time_to_first_fixation 的效果，但不够显著。

2. Number_of_fixations
	•	Format:
	•	Coefficient: 16.5476
	•	P-value: 0.5698（不显著）
	•	结论: 全大写字母格式对所有用户的 Number_of_fixations 没有显著总体影响。
	•	Interaction (Format × LanguageGroup):
	•	Coefficient: 35.3042
	•	P-value: 0.4481（不显著）
	•	结论: 语言背景对全大写字母格式的影响没有显著调节作用。

结合假设 1：全大写字母是否更吸引用户的注意力？

假设 1 的核心指标：
	•	Time_to_first_fixation 和 Number_of_fixations，分别衡量 注意力吸引速度 和 注意力集中度。

结论：
	1.	总体分析:
	•	全大写字母格式对 Time_to_first_fixation 和 Number_of_fixations 没有显著总体影响（P 值均较大）。
	2.	任务难度分析:

	•	在 普通难度任务 中，字体格式对注意力没有显著影响。
	•	在 中等难度任务 中，全大写字母显著吸引了用户的初始注意力（Time to First Fixation）和吸引速度（Time_to_first_fixation）。
	•	在 困难任务 中，全大写字母显著增加了注视次数（Fixation Count），说明其对持续关注可能更有帮助。

	3.	语言背景分析:
	•	语言背景对文字格式的调节作用接近显著（P = 0.057），提示母语为非字母语言的用户可能对全大写字母更敏感，支持假设中 语言背景可能影响文字格式的效果,但需要更大样本量验证。
	•	P 值为 0.057，虽然接近显著，但未达到严格的统计学显著性（<0.05）。可以通过增加样本量或改进实验设计进行进一步验证。

回答假设 1：
`	•	如果 Time_to_first_fixation 显著降低，说明用户更快注意到全大写字母。
	•	如果 Number_of_fixations 显著增加，说明用户更长时间关注全大写字母。`

	•	全大写字母在某些特定任务（如中等任务）中更能吸引用户的注意力（显著增加 Number_of_fixations 和 Time_to_first_fixation）。
	    在困难任务中，全大写字母显著增加了用户的 Number_of_fixations。说明用户更长时间关注全大写字母
	•	整体来看，全大写字母在简单和困难任务中没有显著效果。
	•	语言背景的调节作用需要进一步研究，目前未达显著水平。

# H2: 全大写字母是否增加了用户的认知负荷？尤其是非字母语言用户

单独分析：Average Duration of Fixation 和 Number of Saccades 是不同维度的指标，分别反映认知负荷的深度和跳转频率。单独建模有助于明确各特征的独立作用。



这个是单独分析的方式：

In [14]:
from statsmodels.formula.api import mixedlm

# function for overall analysis
def overall_analysis(feature, data):
    print(f"\n=== Overall Analysis for Feature: {feature} ===")
    model = mixedlm(f"{feature} ~ Format", data, groups="Participant", re_formula="~1")
    result = model.fit()
    coef = result.params.get("Format[T.Uppercase]", None)
    p_value = result.pvalues.get("Format[T.Uppercase]", None)
    print(f"Coefficient (Format[T.Uppercase]): {coef:.4f}")
    print(f"P-value (Format[T.Uppercase]): {p_value:.4f}")
    if p_value < 0.05:
        if coef > 0:
            print(f"Result: Uppercase format significantly increases {feature}.")
        else:
            print(f"Result: Uppercase format significantly decreases {feature}.")
    return result

# function for analyzing by task difficulty
def analyze_by_difficulty(feature, data, difficulties):
    print(f"\n=== Analyzing Feature: {feature} for Different Task Difficulties ===")
    for difficulty in difficulties:
        print(f"\nTask Difficulty: {difficulty}")
        filtered_data = data[data['Difficulty'] == difficulty]
        model = mixedlm(f"{feature} ~ Format", filtered_data, groups="Participant", re_formula="~1")
        result = model.fit()
        coef = result.params.get("Format[T.Uppercase]", None)
        p_value = result.pvalues.get("Format[T.Uppercase]", None)
        print(f"Coefficient (Format[T.Uppercase]): {coef:.4f}")
        print(f"P-value (Format[T.Uppercase]): {p_value:.4f}")
        if p_value < 0.05:
            if coef > 0:
                print(f"Result: For {difficulty} tasks, Uppercase format significantly increases {feature}.")
            else:
                print(f"Result: For {difficulty} tasks, Uppercase format significantly decreases {feature}.")

# function for analyzing by language group
def analyze_by_language_group(feature, data):
    print(f"\n=== Analyzing Feature: {feature} for Different Language Groups ===")
    model = mixedlm(f"{feature} ~ Format * LanguageGroup", data, groups="Participant", re_formula="~1")
    result = model.fit()
    coef_format = result.params.get("Format[T.Uppercase]", None)
    coef_interaction = result.params.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
    p_value_format = result.pvalues.get("Format[T.Uppercase]", None)
    p_value_interaction = result.pvalues.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
    print(f"Coefficient (Format[T.Uppercase]): {coef_format:.4f}")
    print(f"P-value (Format[T.Uppercase]): {p_value_format:.4f}")
    print(f"Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {coef_interaction:.4f}")
    print(f"P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {p_value_interaction:.4f}")
    if p_value_interaction < 0.05:
        if coef_interaction > 0:
            print(f"Result: Language Group 2 (logographic) is significantly more sensitive to Uppercase format.")
        else:
            print(f"Result: Language Group 1 (alphabetic) is significantly more sensitive to Uppercase format.")
    return result

# task difficulties and features
features = ["Average_duration_of_fixations", "Number_of_saccades_in_AOI"]
difficulties = ["Easy", "Medium", "Hard"]

# Analyze each feature
for feature in features:
    overall_analysis(feature, task_level_data_filter)
    
    analyze_by_difficulty(feature, task_level_data_filter, difficulties)

    analyze_by_language_group(feature, task_level_data_filter)


=== Overall Analysis for Feature: Average_duration_of_fixations ===
Coefficient (Format[T.Uppercase]): -7.2899
P-value (Format[T.Uppercase]): 0.1712

=== Analyzing Feature: Average_duration_of_fixations for Different Task Difficulties ===

Task Difficulty: Easy
Coefficient (Format[T.Uppercase]): -9.5652
P-value (Format[T.Uppercase]): 0.3152

Task Difficulty: Medium
Coefficient (Format[T.Uppercase]): -5.1304
P-value (Format[T.Uppercase]): 0.2247

Task Difficulty: Hard
Coefficient (Format[T.Uppercase]): -7.1739
P-value (Format[T.Uppercase]): 0.0908

=== Analyzing Feature: Average_duration_of_fixations for Different Language Groups ===
Coefficient (Format[T.Uppercase]): -6.7381
P-value (Format[T.Uppercase]): 0.3259
Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): -1.4101
P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): 0.8977

=== Overall Analysis for Feature: Number_of_saccades_in_AOI ===
Coefficient (Format[T.Uppercase]): 21.1159
P-value (F



分析与 H2 回答

假设 H2 回顾

	全大写字母是否增加了用户的认知负荷？尤其是非字母语言用户（如亚洲用户）？

结果分析

1. 特征：Average Duration of Fixation
	•	总体分析:
	•	Coefficient: -7.2899
	•	P-value: 0.1712 （不显著）
	•	结论: 全大写字母对 Average_duration_of_fixations 的总体影响不显著，说明全大写字母在所有任务和语言背景中不会显著增加认知负荷。
	•	任务难度分析:
	•	Easy:
	•	Coefficient: -9.5652
	•	P-value: 0.3152 （不显著）
	•	结论: 简单任务中，全大写字母对 Average_duration_of_fixations 没有显著影响。
	•	Medium:
	•	Coefficient: -5.1304
	•	P-value: 0.2247 （不显著）
	•	结论: 中等任务中，全大写字母对 Average_duration_of_fixations 没有显著影响。
	•	Hard:
	•	Coefficient: -7.1739
	•	P-value: 0.0908 （接近显著）
	•	结论: 困难任务中，全大写字母可能减少用户的 Average_duration_of_fixations，但尚未达显著水平。
	•	语言背景分析:
	•	Format:
	•	Coefficient: -6.7381
	•	P-value: 0.3259 （不显著）
	•	结论: 全大写字母对所有语言背景下的 Average_duration_of_fixations 没有显著影响。
	•	Interaction (Format × LanguageGroup):
	•	Coefficient: -1.4101
	•	P-value: 0.8977 （不显著）
	•	结论: 语言背景对 Average_duration_of_fixations 的调节作用不显著。

2. 特征：Number of Saccades in AOI
	•	总体分析:
	•	Coefficient: 21.1159
	•	P-value: 0.2693 （不显著）
	•	结论: 全大写字母对 Number_of_saccades_in_AOI 的总体影响不显著。
	•	任务难度分析:
	•	Easy:
	•	Coefficient: 0.2174
	•	P-value: 0.9267 （不显著）
	•	结论: 简单任务中，全大写字母对 Number_of_saccades_in_AOI 没有显著影响。
	•	Medium:
	•	Coefficient: 17.5217
	•	P-value: 0.0003 （显著）
	•	结论: 中等任务中，全大写字母显著增加了用户的 Number_of_saccades_in_AOI，表明用户可能需要更多的跳转来处理信息。
	•	Hard:
	•	Coefficient: 45.6087
	•	P-value: 0.0384 （显著）
	•	结论: 困难任务中，全大写字母显著增加了用户的 Number_of_saccades_in_AOI，进一步支持其可能增加认知负荷。
	•	语言背景分析:
	•	Format:
	•	Coefficient: 13.0238
	•	P-value: 0.5960 （不显著）
	•	结论: 全大写字母对所有语言背景下的 Number_of_saccades_in_AOI 没有显著影响。
	•	Interaction (Format × LanguageGroup):
	•	Coefficient: 20.6799
	•	P-value: 0.5985 （不显著）
	•	结论: 语言背景对 Number_of_saccades_in_AOI 的调节作用不显著。

结合假设 H2 的回答

回答 1：全大写字母是否增加了认知负荷？
	•	Average Duration of Fixation:
	•	全大写字母对用户认知处理深度的影响不显著。
	•	在所有任务难度和语言背景中，Average Duration of Fixation 没有显著变化。
	•	结论: 没有明确证据表明全大写字母会增加用户的认知负荷深度。
	•	Number of Saccades in AOI:
	•	在 中等难度任务 和 困难任务 中，全大写字母显著增加了 Number_of_saccades_in_AOI，表明用户可能频繁跳转以处理信息，可能反映认知负荷增加。
	•	简单任务中没有显著变化。
	•	结论: 全大写字母在复杂任务中可能增加用户的认知负荷（通过更多跳转反映）。

回答 2：非字母语言用户是否受影响更大？
	•	语言背景分析:
	•	无论是 Average Duration of Fixation 还是 Number of Saccades in AOI，语言背景对全大写字母的影响没有显著调节作用。
	•	结论: 当前数据没有证据支持非字母语言用户比字母语言用户的认知负荷更高。

研究建议
	1.	特征重点:
	•	聚焦 Number_of_saccades_in_AOI，尤其是复杂任务（中等和困难任务）中的显著差异。
	2.	数据扩展:
	•	增加样本量，进一步验证语言背景的潜在调节作用。
	3.	研究补充:
	•	在任务设计中明确认知负荷和任务目标的复杂性，以加强假设验证的针对性。

Average_duration_of_fixations in hard: P-value: 0.0908 （接近显著）困难任务中，全大写字母可能减少用户的 ，但尚未达显著水平。

Number_of_saccades_in_AOI in medium: P-value: 0.0003 （显著） 中等任务中，全大写字母显著增加了用户的 Number_of_saccades_in_AOI，表明用户可能需要更多的跳转来处理信息.

Number_of_saccades_in_AOI in medium in hard: P-value: 0.0384 （显著）困难任务中，全大写字母显著增加了用户的 Number_of_saccades_in_AOI，进一步支持其可能增加认知负荷。

0.5985 （不显著） 语言背景对 Number_of_saccades_in_AOI 的调节作用不显著。

不同语言背景的用户在不同任务难度下对不同文字格式的反应差异

In [15]:
from statsmodels.formula.api import mixedlm

# function for comprehensive analysis (language background and task difficulty)
def comprehensive_analysis(feature, data):
    print(f"\n=== Comprehensive Analysis for Feature: {feature} ===")
    model = mixedlm(f"{feature} ~ Format * LanguageGroup * Difficulty", data, groups="Participant", re_formula="~1")
    result = model.fit()
    print(result.summary())
    
    # Extract P-values and coefficients
    p_format = result.pvalues.get("Format[T.Uppercase]", None)
    p_interaction_lg = result.pvalues.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
    p_interaction_dif = result.pvalues.get("Format[T.Uppercase]:Difficulty[T.Hard]", None)
    p_interaction_all = result.pvalues.get("Format[T.Uppercase]:LanguageGroup[T.Group2]:Difficulty[T.Hard]", None)
    
    print(f"P-value (Format[T.Uppercase]): {p_format}")
    print(f"P-value (Format[T.Uppercase] * LanguageGroup[T.Group2]): {p_interaction_lg}")
    print(f"P-value (Format[T.Uppercase] * Difficulty[T.Hard]): {p_interaction_dif}")
    print(f"P-value (Format[T.Uppercase] * LanguageGroup[T.Group2] * Difficulty[T.Hard]): {p_interaction_all}")
    return result

# function for analysis by task difficulty and language group
def analyze_by_task_and_language(feature, data, difficulties):
    print(f"\n=== Analyzing Feature: {feature} for Different Task Difficulties and Language Groups ===")
    results = {}
    for difficulty in difficulties:
        print(f"\nTask Difficulty: {difficulty}")
        filtered_data = data[data['Difficulty'] == difficulty]
        
        # Build the model
        model = mixedlm(f"{feature} ~ Format * LanguageGroup", filtered_data, groups="Participant", re_formula="~1")
        result = model.fit()
        results[difficulty] = result
        
        # Extract P-values and coefficients
        coef_format = result.params.get("Format[T.Uppercase]", None)
        coef_interaction = result.params.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
        p_format = result.pvalues.get("Format[T.Uppercase]", None)
        p_interaction = result.pvalues.get("Format[T.Uppercase]:LanguageGroup[T.Group2]", None)
        

        print(f"Coefficient (Format[T.Uppercase]): {coef_format:.4f}")
        print(f"P-value (Format[T.Uppercase]): {p_format:.4f}")
        print(f"Coefficient (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {coef_interaction:.4f}")
        print(f"P-value (Interaction: Format[T.Uppercase] * LanguageGroup[T.Group2]): {p_interaction:.4f}")
        
        if p_interaction < 0.05:
            if coef_interaction > 0:
                print(f"Result: In {difficulty} tasks, LanguageGroup 2 (logographic) is more sensitive to Uppercase format.")
            else:
                print(f"Result: In {difficulty} tasks, LanguageGroup 1 (alphabetic) is more sensitive to Uppercase format.")
    return results

# Define features and task difficulties
features = ["Average_duration_of_fixations", "Number_of_saccades_in_AOI"]
difficulties = ["Easy", "Medium", "Hard"]

# Perform comprehensive analysis
for feature in features:
    comprehensive_analysis(feature, task_level_data_filter)

# Analyze by task difficulty
for feature in features:
    analyze_by_task_and_language(feature, task_level_data_filter, difficulties)


=== Comprehensive Analysis for Feature: Average_duration_of_fixations ===


  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))


                                    Mixed Linear Model Regression Results
Model:                         MixedLM            Dependent Variable:            Average_duration_of_fixations
No. Observations:              138                Method:                        REML                         
No. Groups:                    23                 Scale:                         906.5439                     
Min. group size:               6                  Log-Likelihood:                -634.4176                    
Max. group size:               6                  Converged:                     Yes                          
Mean group size:               6.0                                                                            
--------------------------------------------------------------------------------------------------------------
                                                                  Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
--------------------------------------

无论是任务难度还是语言背景，全大写字母对 Average_duration_of_fixations 的影响不显著，无法支持假设 2 中关于 认知负荷深度 的推论。

	•	中等任务中，全大写字母对 Number_of_saccades_in_AOI 的影响显著(Coefficient: 19.5873, P-value: 0.0325)，语言背景显著调节了这种影响，表明非字母语言用户更敏感。
	•	简单和困难任务中，全大写字母对 Number_of_saccades_in_AOI 没有显著影响，也没有显著的语言背景调节作用。

# H3: Title Case 是否更有助于任务完成效率和减少操作错误？ 

In [3]:
import pandas as pd
task_level_data_filter = pd.read_csv('./preprocessed_task_level_data.tsv', sep="\t")

In [18]:
from statsmodels.formula.api import logit, mixedlm
from statsmodels.multivariate.manova import MANOVA

# 1. Analyze task completion efficiency 
def analyze_efficiency(features, data, difficulties):
    for feature in features:
        print(f"\n=== Efficiency Analysis for Feature: {feature} ===")
        
        # Overall analysis
        print(f"\n--- Overall Analysis for {feature} ---")
        formula = f"{feature} ~ Format"
        model = mixedlm(formula, data, groups="Participant", re_formula="~1")
        result = model.fit()
        print(result.summary())
        
        # Task difficulty analysis
        print(f"\n--- Task Difficulty Analysis for {feature} ---")
        for difficulty in difficulties:
            filtered_data = data[data['Difficulty'] == difficulty]
            model = mixedlm(formula, filtered_data, groups="Participant", re_formula="~1")
            result = model.fit()
            print(f"\nTask Difficulty: {difficulty}")
            print(result.summary())
        
        # Language group analysis
        print(f"\n--- Language Group Analysis for {feature} ---")
        formula = f"{feature} ~ Format * LanguageGroup"
        model = mixedlm(formula, data, groups="Participant", re_formula="~1")
        result = model.fit()
        print(result.summary())

# 2. Analyze operational errors
def analyze_errors(data, difficulties):
    print("\n=== Analysis of Errors (IsCorrect) ===")
    
    # Overall analysis
    print("\n--- Overall Analysis ---")
    formula = "IsCorrect_numeric ~ Format"
    model = logit(formula, data)
    result = model.fit()
    print(result.summary())
    
    # Task difficulty analysis
    print("\n--- Task Difficulty Analysis ---")
    for difficulty in difficulties:
        filtered_data = data[data['Difficulty'] == difficulty]
        model = logit(formula, filtered_data)
        result = model.fit()
        print(f"\nTask Difficulty: {difficulty}")
        print(result.summary())
    
    # Language group analysis
    print("\n--- Language Group Analysis ---")
    formula = "IsCorrect_numeric ~ Format * LanguageGroup"
    model = logit(formula, data)
    result = model.fit()
    print(result.summary())

# features and task difficulties
efficiency_features = ["Time_to_first_fixation", "Number_of_fixations", "Total_duration_of_fixations"]
difficulties = ["Easy", "Medium", "Hard"]

# 1. Analyze task completion efficiency
analyze_efficiency(efficiency_features, task_level_data_filter, difficulties)

# 2. Analyze operational errors
analyze_errors(task_level_data_filter, difficulties)


=== Efficiency Analysis for Feature: Time_to_first_fixation ===

--- Overall Analysis for Time_to_first_fixation ---
                Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Time_to_first_fixation
No. Observations:  138     Method:             REML                  
No. Groups:        23      Scale:              9161036.1455          
Min. group size:   6       Log-Likelihood:     -1288.7948            
Max. group size:   6       Converged:          Yes                   
Mean group size:   6.0                                               
---------------------------------------------------------------------
                      Coef.    Std.Err.   z   P>|z|  [0.025   0.975] 
---------------------------------------------------------------------
Intercept             1080.174  377.568 2.861 0.004  340.154 1820.194
Format[T.Uppercase]    655.043  515.303 1.271 0.204 -354.932 1665.019
Participant Var     225147.348  190.625                   

  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))



Task Difficulty: Hard
                Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Time_to_first_fixation
No. Observations:  46      Method:             REML                  
No. Groups:        23      Scale:              5720550.1022          
Min. group size:   2       Log-Likelihood:     -407.8795             
Max. group size:   2       Converged:          Yes                   
Mean group size:   2.0                                               
---------------------------------------------------------------------
                     Coef.   Std.Err.   z    P>|z|   [0.025   0.975] 
---------------------------------------------------------------------
Intercept           1233.174  498.718  2.473 0.013   255.705 2210.643
Format[T.Uppercase] -636.087  705.294 -0.902 0.367 -2018.437  746.263
Participant Var        0.017  600.098                                


--- Language Group Analysis for Time_to_first_fixation ---
                      

  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))



Task Difficulty: Medium
              Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Number_of_fixations
No. Observations: 46      Method:             REML               
No. Groups:       23      Scale:              449.9074           
Min. group size:  2       Log-Likelihood:     -215.2160          
Max. group size:  2       Converged:          Yes                
Mean group size:  2.0                                            
-----------------------------------------------------------------
                        Coef.  Std.Err.   z   P>|z| [0.025 0.975]
-----------------------------------------------------------------
Intercept               53.957    6.993 7.716 0.000 40.251 67.662
Format[T.Uppercase]     23.217    6.255 3.712 0.000 10.958 35.477
Participant Var        674.780   18.087                          


Task Difficulty: Hard
              Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Number_o

  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))


                  Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Total_duration_of_fixations
No. Observations: 138     Method:             REML                       
No. Groups:       23      Scale:              1033400726.1575            
Min. group size:  6       Log-Likelihood:     -1609.0625                 
Max. group size:  6       Converged:          Yes                        
Mean group size:  6.0                                                    
-------------------------------------------------------------------------
                        Coef.    Std.Err.   z   P>|z|   [0.025    0.975] 
-------------------------------------------------------------------------
Intercept              22317.159 3908.959 5.709 0.000 14655.740 29978.579
Format[T.Uppercase]     6550.101 5472.992 1.197 0.231 -4176.766 17276.969
Participant Var      6972259.822 1835.868                                


--- Task Difficulty Analysis for Total_duration_of_fix

  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))
  dat = dat.applymap(lambda x: _formatter(x, float_format))


                             Mixed Linear Model Regression Results
Model:                     MixedLM        Dependent Variable:        Total_duration_of_fixations
No. Observations:          138            Method:                    REML                       
No. Groups:                23             Scale:                     1006094971.9727            
Min. group size:           6              Log-Likelihood:            -1586.0372                 
Max. group size:           6              Converged:                 Yes                        
Mean group size:           6.0                                                                  
------------------------------------------------------------------------------------------------
                                              Coef.    Std.Err.   z   P>|z|   [0.025     0.975] 
------------------------------------------------------------------------------------------------
Intercept                                   18604.381  4894.

Time to First Fixation (Task Efficiency): 
    Task Difficulty Analysis:
	•	Easy: P-value = 0.137, no significant difference.
	•	Medium: P-value = 0.002, significant increase in Time to First Fixation for Uppercase.
	•	Hard: P-value = 0.367, no significant difference.
	•	Interpretation: Uppercase increases Time to First Fixation significantly only for Medium tasks, suggesting that Uppercase might reduce efficiency for tasks of medium difficulty.


Number of Fixations (Task Efficiency):
    •	Task Difficulty Analysis:
	•	Easy: P-value = 0.800, no significant difference.
	•	Medium: P-value < 0.001, significant increase in fixations for Uppercase.
	•	Hard: P-value = 0.007, significant increase in fixations for Uppercase.
	•	Interpretation: Uppercase increases the total number of fixations for Medium and Hard tasks, suggesting reduced efficiency for these task levels.

Errors (IsCorrect):
	•	Overall Analysis: P-value = 0.054, close to significance, with a negative coefficient suggesting Uppercase might slightly increase error rates.
	•	Task Difficulty Analysis:
	•	Easy: P-value = 0.090, no significant difference.
	•	Medium: P-value = 0.998, no significant difference.
	•	Hard: P-value = 0.017, significant increase in errors for Uppercase.
	•	Interpretation: Uppercase significantly increases error rates for Hard tasks, indicating potential inefficiency for more complex tasks.


Language Group Analysis (IsCorrect):
	•	P-value for the interaction between Format and Language Group = 0.222, indicating no significant difference in the effect of Title Case vs. Uppercase on error rates between alphabetic and logographic language groups.

conclusion:
	1.	Efficiency:
	•	Title Case appears more efficient for Medium tasks (as indicated by shorter Time to First Fixation).
	•	Uppercase significantly reduces efficiency for Medium and Hard tasks due to increased fixations.
	2.	Error Reduction:
	•	Title Case reduces errors compared to Uppercase for Hard tasks, with no significant differences for Easy and Medium tasks.
	3.	Language Background:
	•	There is no significant interaction between Format and Language Group, suggesting that language background does not moderate the efficiency or error reduction differences between Title Case and Uppercase.