# A群 vs B群 視線行動比較分析

メタ認知支援解説（B群）が視線行動に与える影響を多角的に分析し、群間差を明らかにする。

- **セクションA**: セグメントレベル分析（statistics.csv活用）
- **セクションB**: トレーニング進行分析
- **セクションC**: AOIレベル分析（注視配分）
- **セクションD**: 探索的分析
- **セクションE**: 統合的統計まとめ
- **セクションF**: 結論・出力

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import nct
from scipy.integrate import quad
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.family'] = 'Hiragino Sans'
import json
import os
import sys
import warnings
from pathlib import Path
from collections import defaultdict

warnings.filterwarnings('ignore', category=RuntimeWarning)
%matplotlib inline

sys.path.append('../lib/')
import eyegaze as eg

# パス定義
PROJECT_ROOT = Path.cwd().parent.parent
INPUT_ROOT = PROJECT_ROOT / 'data' / 'input'
WORKING_ROOT = PROJECT_ROOT / 'data' / 'working'
OUTPUT_ROOT = PROJECT_ROOT / 'data' / 'output'

# 参加者定義
# P005, P011: pre-testの視線データ品質不良のため除外
#   P005: 有効視線率 24.4%（右目がほぼ未計測）
#   P011: 有効視線率 43.0%（両目とも不良）
participants = {
    'A': ['P001', 'P002', 'P006', 'P008', 'P009', 'P010', 'P016', 'P017'],
    'B': ['P003', 'P004', 'P007', 'P012', 'P013', 'P014', 'P015', 'P018', 'P019', 'P020'],
}
phases = ['pre', 'post', 'training1', 'training2', 'training3']
TOLERANCE = 5.0

print(f'Project root: {PROJECT_ROOT}')

---
## セクションA: セグメントレベル分析（statistics.csv活用）

全参加者の `statistics.csv` を集約し、読解時間・固視回数・瞳孔径等のセグメントレベル指標を群間比較する。

In [None]:
# --- A-1: statistics.csv の集約 ---
all_stats = []

for group, pids in participants.items():
    for pid in pids:
        for phase in phases:
            csv_path = OUTPUT_ROOT / group / pid / phase / 'statistics.csv'
            if not csv_path.exists():
                continue
            df_tmp = pd.read_csv(csv_path)
            df_tmp['group'] = group
            df_tmp['participant'] = pid
            df_tmp['phase'] = phase
            all_stats.append(df_tmp)

df_all = pd.concat(all_stats, ignore_index=True)
print(f'全レコード数: {len(df_all)}')
print(f'カラム: {list(df_all.columns)}')

In [None]:
# --- A-2: 問題画面のみフィルタ ---
# pre/post: question_screen_open
# training: question_screen_open + analog_question_open_an{1,2,3}
question_events = ['question_screen_open',
                   'analog_question_open_an1', 'analog_question_open_an2', 'analog_question_open_an3']

df_q = df_all[df_all['event_type'].isin(question_events)].copy()
print(f'問題画面レコード数: {len(df_q)}')
print(df_q.groupby(['group', 'phase', 'event_type']).size().unstack(fill_value=0))

In [None]:
# --- A-3: 参加者×フェーズごとに集約 ---
seg_metrics = [
    # 基本
    'duration_sec',
    # 固視
    'fixation_count', 'fixation_rate',
    'total_fixation_duration', 'mean_fixation_duration', 'std_fixation_duration',
    # 瞳孔
    'mean_pupil_diameter', 'std_pupil_diameter',
    # サッカード
    'mean_saccade_length', 'std_saccade_length',
    'mean_saccade_speed', 'std_saccade_speed',
    'regression_rate',
]

seg_metric_labels = {
    'duration_sec': '読解時間 (秒)',
    'fixation_count': '固視回数',
    'fixation_rate': '固視率 (回/秒)',
    'total_fixation_duration': '総固視時間 (秒)',
    'mean_fixation_duration': '平均固視時間 (秒)',
    'std_fixation_duration': '固視時間SD (秒)',
    'mean_pupil_diameter': '平均瞳孔径 (mm)',
    'std_pupil_diameter': '瞳孔径SD (mm)',
    'mean_saccade_length': '平均サッカード長 (px)',
    'std_saccade_length': 'サッカード長SD (px)',
    'mean_saccade_speed': '平均サッカード速度 (px/s)',
    'std_saccade_speed': 'サッカード速度SD (px/s)',
    'regression_rate': '回帰率',
}

df_seg = df_q.groupby(['group', 'participant', 'phase'])[seg_metrics].mean().reset_index()
print(f'参加者レベルDF: {df_seg.shape}')
df_seg.head()

In [None]:
# --- A-4: 記述統計 ---
print('【セグメントレベル記述統計】')
print('=' * 70)

for metric in seg_metrics:
    print(f'\n--- {seg_metric_labels[metric]} ---')
    summary = df_seg.groupby(['group', 'phase'])[metric].agg(['mean', 'std', 'count'])
    summary.columns = ['M', 'SD', 'n']
    summary['M(SD)'] = summary.apply(lambda r: f"{r['M']:.3f} ({r['SD']:.3f})", axis=1)
    pivot = summary['M(SD)'].unstack(level='phase')
    phase_order = [p for p in ['pre', 'training1', 'training2', 'training3', 'post'] if p in pivot.columns]
    print(pivot[phase_order])

In [None]:
# --- A-5: ヘルパー関数群 ---

def get_paired_data(df, group, metric, phase_pre='pre', phase_post='post'):
    """群内のpre/postペアデータを取得"""
    pre = df[(df['group'] == group) & (df['phase'] == phase_pre)].set_index('participant')[metric]
    post = df[(df['group'] == group) & (df['phase'] == phase_post)].set_index('participant')[metric]
    common = pre.index.intersection(post.index)
    pre_v = pre.loc[common].dropna()
    post_v = post.loc[common].dropna()
    common2 = pre_v.index.intersection(post_v.index)
    return pre_v.loc[common2].values, post_v.loc[common2].values


def cohens_d_paired(pre, post):
    diff = post - pre
    sd = np.std(diff, ddof=1)
    return np.mean(diff) / sd if sd > 0 else 0.0


def cohens_d_ind(x, y):
    nx, ny = len(x), len(y)
    pooled = np.sqrt(((nx-1)*np.var(x, ddof=1) + (ny-1)*np.var(y, ddof=1)) / (nx+ny-2))
    return (np.mean(x) - np.mean(y)) / pooled if pooled > 0 else 0.0


def bootstrap_ci(x, y, func, n_boot=10000, ci=95, seed=42):
    """ブートストラップ信頼区間"""
    rng = np.random.RandomState(seed)
    stats_boot = []
    for _ in range(n_boot):
        bx = x[rng.choice(len(x), len(x), replace=True)]
        by = y[rng.choice(len(y), len(y), replace=True)]
        stats_boot.append(func(bx, by))
    lo = np.percentile(stats_boot, (100 - ci) / 2)
    hi = np.percentile(stats_boot, 100 - (100 - ci) / 2)
    return lo, hi


def mixed_anova(pre_A, post_A, pre_B, post_B):
    """2x2 Mixed ANOVA: Group(A/B) x Time(pre/post)"""
    n_A, n_B = len(pre_A), len(pre_B)
    N = n_A + n_B
    grand_mean = np.mean(np.concatenate([pre_A, post_A, pre_B, post_B]))
    mean_A = np.mean(np.concatenate([pre_A, post_A]))
    mean_B = np.mean(np.concatenate([pre_B, post_B]))
    mean_pre = np.mean(np.concatenate([pre_A, pre_B]))
    mean_post = np.mean(np.concatenate([post_A, post_B]))
    mean_A_pre, mean_A_post = np.mean(pre_A), np.mean(post_A)
    mean_B_pre, mean_B_post = np.mean(pre_B), np.mean(post_B)

    SS_group = 2 * (n_A * (mean_A - grand_mean)**2 + n_B * (mean_B - grand_mean)**2)
    SS_time = N * ((mean_pre - grand_mean)**2 + (mean_post - grand_mean)**2)
    SS_ix = (n_A * ((mean_A_pre - mean_A - mean_pre + grand_mean)**2 +
                    (mean_A_post - mean_A - mean_post + grand_mean)**2) +
             n_B * ((mean_B_pre - mean_B - mean_pre + grand_mean)**2 +
                    (mean_B_post - mean_B - mean_post + grand_mean)**2))

    subj_A = (pre_A + post_A) / 2
    subj_B = (pre_B + post_B) / 2
    SS_subj = 2 * (np.sum((subj_A - mean_A)**2) + np.sum((subj_B - mean_B)**2))

    SS_ew = 0.0
    for i in range(n_A):
        SS_ew += (pre_A[i] - subj_A[i] - mean_pre + mean_A)**2
        SS_ew += (post_A[i] - subj_A[i] - mean_post + mean_A)**2
    for i in range(n_B):
        SS_ew += (pre_B[i] - subj_B[i] - mean_pre + mean_B)**2
        SS_ew += (post_B[i] - subj_B[i] - mean_post + mean_B)**2

    df_g, df_t, df_ix = 1, 1, 1
    df_s = N - 2
    df_ew = N - 2

    MS_g = SS_group / df_g
    MS_t = SS_time / df_t
    MS_ix = SS_ix / df_ix
    MS_s = SS_subj / df_s if df_s > 0 else 1e-10
    MS_ew = SS_ew / df_ew if df_ew > 0 else 1e-10

    F_g = MS_g / MS_s
    F_t = MS_t / MS_ew
    F_ix = MS_ix / MS_ew

    p_g = 1 - stats.f.cdf(F_g, df_g, df_s)
    p_t = 1 - stats.f.cdf(F_t, df_t, df_ew)
    p_ix = 1 - stats.f.cdf(F_ix, df_ix, df_ew)

    eta2_g = SS_group / (SS_group + SS_subj) if (SS_group + SS_subj) > 0 else 0
    eta2_t = SS_time / (SS_time + SS_ew) if (SS_time + SS_ew) > 0 else 0
    eta2_ix = SS_ix / (SS_ix + SS_ew) if (SS_ix + SS_ew) > 0 else 0

    return {
        'F_group': F_g, 'p_group': p_g, 'eta2_group': eta2_g, 'df_g': df_g, 'df_s': df_s,
        'F_time': F_t, 'p_time': p_t, 'eta2_time': eta2_t, 'df_t': df_t, 'df_ew': df_ew,
        'F_interaction': F_ix, 'p_interaction': p_ix, 'eta2_interaction': eta2_ix,
    }

print('ヘルパー関数定義完了')

In [None]:
# --- A-5b: 正規性検定 (Shapiro-Wilk) ---
print('【正規性検定 (Shapiro-Wilk) — セグメントレベル指標】')
print('=' * 70)
print('※ p > 0.05 で正規性を仮定（○）、p ≤ 0.05 で棄却（×）')

normality_rows = []

for metric in seg_metrics:
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_seg, group, metric)
        if len(pre_v) < 3:
            continue
        diff = post_v - pre_v

        # Pre
        w_pre, p_pre = stats.shapiro(pre_v)
        # Post
        w_post, p_post = stats.shapiro(post_v)
        # 差分 (post - pre)
        w_diff, p_diff = stats.shapiro(diff)

        normality_rows.append({
            '指標': seg_metric_labels[metric],
            '群': group,
            'n': len(pre_v),
            'Pre W': f'{w_pre:.3f}',
            'Pre p': f'{p_pre:.3f}',
            'Pre 正規性': '○' if p_pre > 0.05 else '×',
            'Post W': f'{w_post:.3f}',
            'Post p': f'{p_post:.3f}',
            'Post 正規性': '○' if p_post > 0.05 else '×',
            '差分 W': f'{w_diff:.3f}',
            '差分 p': f'{p_diff:.3f}',
            '差分 正規性': '○' if p_diff > 0.05 else '×',
        })

norm_seg_df = pd.DataFrame(normality_rows)
print()
display(norm_seg_df)

# 群間比較用: ゲインスコアの正規性
print('\n【ゲインスコアの正規性検定（群間比較の前提確認）】')
print('-' * 70)

gain_norm_rows = []
for metric in seg_metrics:
    pre_A, post_A = get_paired_data(df_seg, 'A', metric)
    pre_B, post_B = get_paired_data(df_seg, 'B', metric)
    if len(pre_A) < 3 or len(pre_B) < 3:
        continue
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B

    w_A, p_A = stats.shapiro(gain_A)
    w_B, p_B = stats.shapiro(gain_B)

    gain_norm_rows.append({
        '指標': seg_metric_labels[metric],
        'A群 W': f'{w_A:.3f}',
        'A群 p': f'{p_A:.3f}',
        'A群 正規性': '○' if p_A > 0.05 else '×',
        'B群 W': f'{w_B:.3f}',
        'B群 p': f'{p_B:.3f}',
        'B群 正規性': '○' if p_B > 0.05 else '×',
    })

gain_norm_seg_df = pd.DataFrame(gain_norm_rows)
display(gain_norm_seg_df)

# 判定サマリー
non_normal = norm_seg_df[norm_seg_df['差分 正規性'] == '×']
if len(non_normal) > 0:
    print(f'\n⚠ 差分が非正規: {len(non_normal)}件 → 対応ありt検定の代わりにWilcoxon推奨')
    for _, row in non_normal.iterrows():
        print(f"  - {row['指標']} ({row['群']}群)")
else:
    print('\n✓ 全指標・全群で差分の正規性が確認された → 対応ありt検定が適用可能')

gain_non_normal = gain_norm_seg_df[
    (gain_norm_seg_df['A群 正規性'] == '×') | (gain_norm_seg_df['B群 正規性'] == '×')
]
if len(gain_non_normal) > 0:
    print(f'⚠ ゲインが非正規: {len(gain_non_normal)}件 → 独立t検定の代わりにMann-Whitney U推奨')
    for _, row in gain_non_normal.iterrows():
        flags = []
        if row['A群 正規性'] == '×': flags.append('A群')
        if row['B群 正規性'] == '×': flags.append('B群')
        print(f"  - {row['指標']} ({', '.join(flags)})")
else:
    print('✓ 全指標でゲインスコアの正規性が確認された → 独立t検定が適用可能')

In [None]:
# --- A-6: 群内 Pre→Post 比較 ---
print('【群内 Pre→Post 比較（対応ありt検定 + Wilcoxon）】')
print('=' * 70)

within_results = []

for metric in seg_metrics:
    print(f'\n--- {seg_metric_labels[metric]} ---')
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_seg, group, metric)
        if len(pre_v) < 3:
            print(f'  {group}群: データ不足 (n={len(pre_v)})')
            continue
        diff = post_v - pre_v
        t_stat, t_p = stats.ttest_rel(post_v, pre_v)
        try:
            w_stat, w_p = stats.wilcoxon(diff)
        except ValueError:
            w_stat, w_p = np.nan, np.nan
        d = cohens_d_paired(pre_v, post_v)

        print(f'  {group}群 (n={len(pre_v)}): Pre={np.mean(pre_v):.3f} -> Post={np.mean(post_v):.3f} '
              f'(差={np.mean(diff):+.3f}), t={t_stat:.3f}, p={t_p:.3f}, Wilcoxon p={w_p:.3f}, d={d:.3f}')

        within_results.append({
            '指標': seg_metric_labels[metric], '群': group, 'n': len(pre_v),
            'Pre平均': np.mean(pre_v), 'Post平均': np.mean(post_v), '差': np.mean(diff),
            't': t_stat, 'p_t': t_p, 'p_wilcoxon': w_p, 'Cohen_d': d,
        })

within_df = pd.DataFrame(within_results)
within_df

In [None]:
# --- A-7: 群間ゲインスコア比較 ---
print('【群間ゲインスコア比較（独立t検定 + Mann-Whitney U）】')
print('=' * 70)

between_results = []

for metric in seg_metrics:
    print(f'\n--- {seg_metric_labels[metric]} ---')
    pre_A, post_A = get_paired_data(df_seg, 'A', metric)
    pre_B, post_B = get_paired_data(df_seg, 'B', metric)
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B

    if len(gain_A) < 3 or len(gain_B) < 3:
        print(f'  データ不足')
        continue

    t_stat, t_p = stats.ttest_ind(gain_A, gain_B)
    u_stat, u_p = stats.mannwhitneyu(gain_A, gain_B, alternative='two-sided')
    d = cohens_d_ind(gain_B, gain_A)
    ci_lo, ci_hi = bootstrap_ci(gain_B, gain_A, cohens_d_ind)

    print(f'  A群 ゲイン: M={np.mean(gain_A):+.3f} (SD={np.std(gain_A, ddof=1):.3f})')
    print(f'  B群 ゲイン: M={np.mean(gain_B):+.3f} (SD={np.std(gain_B, ddof=1):.3f})')
    print(f'  t={t_stat:.3f}, p={t_p:.3f} | U={u_stat:.1f}, p_MW={u_p:.3f} | d={d:.3f} [{ci_lo:.3f}, {ci_hi:.3f}]')

    between_results.append({
        '指標': seg_metric_labels[metric],
        'A群ゲイン平均': np.mean(gain_A), 'A群ゲインSD': np.std(gain_A, ddof=1),
        'B群ゲイン平均': np.mean(gain_B), 'B群ゲインSD': np.std(gain_B, ddof=1),
        't': t_stat, 'p_t': t_p, 'U': u_stat, 'p_MW': u_p,
        'Cohen_d': d, 'CI下限': ci_lo, 'CI上限': ci_hi,
    })

between_df = pd.DataFrame(between_results)
between_df

In [None]:
# --- A-8: 混合分散分析 ---
print('【混合分散分析 (群 x 時点)】')
print('=' * 70)

anova_results = []

for metric in seg_metrics:
    print(f'\n--- {seg_metric_labels[metric]} ---')
    pre_A, post_A = get_paired_data(df_seg, 'A', metric)
    pre_B, post_B = get_paired_data(df_seg, 'B', metric)
    if len(pre_A) < 3 or len(pre_B) < 3:
        continue

    res = mixed_anova(pre_A, post_A, pre_B, post_B)
    print(f'  群主効果:   F({res["df_g"]},{res["df_s"]})={res["F_group"]:.3f}, p={res["p_group"]:.3f}, η²p={res["eta2_group"]:.3f}')
    print(f'  時点主効果: F({res["df_t"]},{res["df_ew"]})={res["F_time"]:.3f}, p={res["p_time"]:.3f}, η²p={res["eta2_time"]:.3f}')
    print(f'  交互作用:   F(1,{res["df_ew"]})={res["F_interaction"]:.3f}, p={res["p_interaction"]:.3f}, η²p={res["eta2_interaction"]:.3f}')

    res['指標'] = seg_metric_labels[metric]
    anova_results.append(res)

anova_df = pd.DataFrame(anova_results)
anova_df[['指標', 'F_group', 'p_group', 'eta2_group', 'F_time', 'p_time', 'eta2_time',
          'F_interaction', 'p_interaction', 'eta2_interaction']]

In [None]:
# --- A-9: 可視化 Pre-Post 棒グラフ ---
fig, axes = plt.subplots(4, 4, figsize=(24, 20))
axes = axes.flatten()

for idx, metric in enumerate(seg_metrics):
    ax = axes[idx]
    means, sems, colors = [], [], []
    for group, color in [('A', '#4C72B0'), ('B', '#DD8452')]:
        for ph in ['pre', 'post']:
            vals = df_seg[(df_seg['group'] == group) & (df_seg['phase'] == ph)][metric].dropna().values
            means.append(np.mean(vals) if len(vals) > 0 else 0)
            sems.append(np.std(vals, ddof=1) / np.sqrt(len(vals)) if len(vals) > 1 else 0)
            colors.append(color)
    x = np.arange(4)
    ax.bar(x, means, yerr=sems, capsize=4, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
    ax.set_xticks(x)
    ax.set_xticklabels(['A\npre', 'A\npost', 'B\npre', 'B\npost'])
    ax.set_title(seg_metric_labels[metric])

for i in range(len(seg_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('セグメントレベル指標: Pre vs Post（群別）', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# --- A-10: ゲインスコアのボックスプロット ---
fig, axes = plt.subplots(4, 4, figsize=(24, 20))
axes = axes.flatten()

for idx, metric in enumerate(seg_metrics):
    ax = axes[idx]
    gain_data = []
    labels = []
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_seg, group, metric)
        gains = post_v - pre_v
        gain_data.append(gains)
        labels.append(f'{group}群\n(n={len(gains)})')
    bp = ax.boxplot(gain_data, tick_labels=labels, patch_artist=True,
                    medianprops=dict(color='black', linewidth=2))
    bp['boxes'][0].set_facecolor('#4C72B0')
    bp['boxes'][0].set_alpha(0.7)
    bp['boxes'][1].set_facecolor('#DD8452')
    bp['boxes'][1].set_alpha(0.7)
    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax.set_title(f'{seg_metric_labels[metric]}\n(Post - Pre)')
    ax.set_ylabel('ゲイン')

for i in range(len(seg_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('セグメントレベル: ゲインスコア (Post - Pre)', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

---
## セクションB: トレーニング進行分析

トレーニングセット内（本問→類題1→2→3）およびトレーニング間（T1→T2→T3）の進行を群間比較する。

In [None]:
# --- B-1: トレーニングセット内の進行データ構築 ---
# 各training内で本問→類題1→類題2→類題3 の4点を取得
training_phases = ['training1', 'training2', 'training3']
step_events = ['question_screen_open',
               'analog_question_open_an1', 'analog_question_open_an2', 'analog_question_open_an3']
step_labels = ['本問', '類題1', '類題2', '類題3']

intra_rows = []
for group, pids in participants.items():
    for pid in pids:
        for phase in training_phases:
            for step_idx, evt in enumerate(step_events):
                vals = df_all[(df_all['group'] == group) & (df_all['participant'] == pid) &
                              (df_all['phase'] == phase) & (df_all['event_type'] == evt)]
                if len(vals) == 0:
                    continue
                row = vals[seg_metrics].mean()
                intra_rows.append({
                    'group': group, 'participant': pid, 'phase': phase,
                    'step': step_idx, 'step_label': step_labels[step_idx],
                    **{m: row[m] for m in seg_metrics}
                })

df_intra = pd.DataFrame(intra_rows)
print(f'セット内進行DF: {df_intra.shape}')
df_intra.head()

In [None]:
# --- B-2: セット内進行の折れ線グラフ（各trainingを平均） ---
fig, axes = plt.subplots(4, 4, figsize=(24, 20))
axes = axes.flatten()

for idx, metric in enumerate(seg_metrics):
    ax = axes[idx]
    for group, color, marker in [('A', '#4C72B0', 'o'), ('B', '#DD8452', 's')]:
        means, sems = [], []
        for step in range(4):
            # 全trainingのstepを平均化
            vals = df_intra[(df_intra['group'] == group) & (df_intra['step'] == step)]
            # 参加者ごとの平均を取ってから群の統計量
            pmeans = vals.groupby('participant')[metric].mean().values
            means.append(np.mean(pmeans) if len(pmeans) > 0 else np.nan)
            sems.append(np.std(pmeans, ddof=1) / np.sqrt(len(pmeans)) if len(pmeans) > 1 else 0)
        ax.errorbar(range(4), means, yerr=sems, marker=marker, color=color,
                    label=f'{group}群', capsize=4, linewidth=2, markersize=8)
    ax.set_xticks(range(4))
    ax.set_xticklabels(step_labels, fontsize=9)
    ax.set_title(seg_metric_labels[metric])
    ax.legend()

for i in range(len(seg_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('セット内進行 (本問 → 類題1-3)', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# --- B-3: セット内の傾きを群間比較 ---
print('【セット内改善傾き（線形回帰）の群間比較】')
print('=' * 70)

slope_results = []

for metric in seg_metrics:
    print(f'\n--- {seg_metric_labels[metric]} ---')
    slopes = {'A': [], 'B': []}
    for group, pids in participants.items():
        for pid in pids:
            p_data = df_intra[(df_intra['group'] == group) & (df_intra['participant'] == pid)]
            # 参加者×training ごとの step vs metric の傾き
            pmeans = p_data.groupby('step')[metric].mean()
            if len(pmeans) >= 3:
                from scipy.stats import linregress
                sl, _, _, _, _ = linregress(pmeans.index, pmeans.values)
                slopes[group].append(sl)

    sl_A = np.array(slopes['A'])
    sl_B = np.array(slopes['B'])
    if len(sl_A) >= 3 and len(sl_B) >= 3:
        t_stat, t_p = stats.ttest_ind(sl_A, sl_B)
        d = cohens_d_ind(sl_B, sl_A)
        print(f'  A群 傾き: M={np.mean(sl_A):.4f} (SD={np.std(sl_A, ddof=1):.4f})')
        print(f'  B群 傾き: M={np.mean(sl_B):.4f} (SD={np.std(sl_B, ddof=1):.4f})')
        print(f'  t={t_stat:.3f}, p={t_p:.3f}, d={d:.3f}')
        slope_results.append({
            '指標': seg_metric_labels[metric],
            'A群傾き平均': np.mean(sl_A), 'B群傾き平均': np.mean(sl_B),
            't': t_stat, 'p': t_p, 'd': d,
        })

if slope_results:
    pd.DataFrame(slope_results)

In [None]:
# --- B-4: トレーニング間の進行 (T1→T2→T3 本問のみ) ---
print('【トレーニング間の進行（本問メトリクス）】')
print('=' * 70)

# 本問のみフィルタ
df_main_q = df_q[df_q['event_type'] == 'question_screen_open'].copy()
df_main_tr = df_main_q[df_main_q['phase'].isin(training_phases)].copy()
df_main_tr_agg = df_main_tr.groupby(['group', 'participant', 'phase'])[seg_metrics].mean().reset_index()

fig, axes = plt.subplots(4, 4, figsize=(24, 20))
axes = axes.flatten()

for idx, metric in enumerate(seg_metrics):
    ax = axes[idx]
    print(f'\n--- {seg_metric_labels[metric]} ---')

    for group, color, marker in [('A', '#4C72B0', 'o'), ('B', '#DD8452', 's')]:
        means, sems = [], []
        for tp in training_phases:
            vals = df_main_tr_agg[(df_main_tr_agg['group'] == group) &
                                  (df_main_tr_agg['phase'] == tp)][metric].dropna().values
            means.append(np.mean(vals) if len(vals) > 0 else np.nan)
            sems.append(np.std(vals, ddof=1) / np.sqrt(len(vals)) if len(vals) > 1 else 0)
        ax.errorbar(range(3), means, yerr=sems, marker=marker, color=color,
                    label=f'{group}群', capsize=4, linewidth=2, markersize=8)
        print(f'  {group}群: ' + ', '.join(f'{tp}={m:.3f}' for tp, m in zip(training_phases, means)))

    ax.set_xticks(range(3))
    ax.set_xticklabels(training_phases)
    ax.set_title(seg_metric_labels[metric])
    ax.legend()

    # Friedman検定（各群内）
    for group in ['A', 'B']:
        phase_vals = []
        for tp in training_phases:
            v = df_main_tr_agg[(df_main_tr_agg['group'] == group) &
                               (df_main_tr_agg['phase'] == tp)].set_index('participant')[metric]
            phase_vals.append(v)
        common = phase_vals[0].index
        for pv in phase_vals[1:]:
            common = common.intersection(pv.index)
        if len(common) >= 3:
            arrs = [pv.loc[common].dropna().values for pv in phase_vals]
            if all(len(a) == len(arrs[0]) for a in arrs) and len(arrs[0]) >= 3:
                chi2, p_f = stats.friedmanchisquare(*arrs)
                print(f'  {group}群 Friedman検定: chi2={chi2:.3f}, p={p_f:.3f}')

for i in range(len(seg_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('トレーニング間の進行（本問のみ: T1→T2→T3）', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

---
## セクションC: AOIレベル分析（注視配分）

補正済み固視データと座標定義から、本文/選択肢/問題文への注視配分比率を算出し群間比較する。

In [None]:
# --- C-1: AOI注視配分の算出 ---
# 補正パラメータの読み込み
corrections_path = WORKING_ROOT / 'corrections' / 'all_segment_corrections.csv'
all_corrections = pd.read_csv(corrections_path)
print(f'補正パラメータ: {len(all_corrections)}行')

aoi_results = []
aoi_errors = []
aoi_processed = 0

for group, pids in participants.items():
    for pid in pids:
        for phase in ['pre', 'post']:
            label = f'{group}/{pid}/{phase}'
            try:
                base_dir = INPUT_ROOT / group / pid / phase
                eye_tracking_base = base_dir / 'eye_tracking'
                log_dir = base_dir / 'logs'
                coord_dir = INPUT_ROOT / group / 'Test' / phase / 'coordinates'

                if not all(d.is_dir() for d in [eye_tracking_base, log_dir, coord_dir]):
                    continue

                ts_dirs = sorted([d for d in os.listdir(eye_tracking_base)
                                  if os.path.isdir(os.path.join(eye_tracking_base, d))])
                if not ts_dirs:
                    continue
                eye_dir = str(eye_tracking_base / ts_dirs[-1])

                evt_files = sorted([f for f in os.listdir(log_dir) if f.endswith('.jsonl')])
                if not evt_files:
                    continue
                evt_path = str(log_dir / evt_files[-1])

                segments = eg.readTobiiData(eye_dir, evt_path, phase=phase)
                coord_mapping = eg.buildCoordinateMapping(str(coord_dir))

                p_corr = all_corrections[
                    (all_corrections['participant'] == pid) & (all_corrections['phase'] == phase)
                ].copy()

                for seg_idx, seg in enumerate(segments):
                    evt = seg.get('event_type', '')
                    if evt != 'question_screen_open':
                        continue

                    data = seg.get('data')
                    if data is None or len(data) == 0:
                        continue

                    seg_id = seg.get('passage_id')
                    image_path = seg.get('image_path', '')
                    prefix = eg._eventTypeToCoordPrefix(evt)
                    coord_path = coord_mapping.get((prefix, seg_id))
                    if not coord_path:
                        continue

                    fixations = eg.detectFixations(
                        data[:, 0], data[:, 1], data[:, 2], P=data[:, 3],
                        min_concat_gaze_count=9, min_fixation_size=20, max_fixation_size=40)
                    if len(fixations) == 0:
                        continue

                    # 補正適用
                    crow = p_corr[p_corr['segment_index'] == seg_idx]
                    if len(crow) > 0:
                        r = crow.iloc[0]
                        fixations = eg.applyScalingAndOffset(
                            fixations, scale_x=r['scale_x'], scale_y=r['scale_y'],
                            offset_x=r['offset_x'], offset_y=r['offset_y'])

                    coordinates = eg.loadCoordinates(coord_path)
                    aoi_params = {}
                    if image_path:
                        parsed = eg.parseImageFilename(image_path)
                        if parsed:
                            aoi_params = {
                                'target_locale': parsed['target_locale'],
                                'target_question': parsed['target_question'],
                                'target_analog': parsed['target_analog'],
                            }

                    # 全レベルAOI抽出
                    aois_all = eg.extractAllAOIs(coordinates,
                        levels=['sentence', 'title', 'subtitle', 'table', 'choice', 'question', 'metadata', 'instruction'],
                        **aoi_params)
                    if len(aois_all) == 0:
                        continue

                    seg_start = fixations[0, 0]
                    per_aoi = eg.computePerAOIStatistics(fixations, aois_all,
                                                         segment_start=seg_start, tolerance=TOLERANCE)

                    # カテゴリ別に注視時間を集計
                    passage_levels = {'sentence', 'metadata', 'title', 'subtitle', 'table', 'instruction'}
                    choice_levels = {'choice'}
                    question_levels = {'question'}

                    dur_passage = sum(s['total_duration'] for s in per_aoi if s['level'] in passage_levels)
                    dur_choice = sum(s['total_duration'] for s in per_aoi if s['level'] in choice_levels)
                    dur_question = sum(s['total_duration'] for s in per_aoi if s['level'] in question_levels)
                    dur_total = dur_passage + dur_choice + dur_question

                    # FFT（文レベル）
                    sent_stats = [s for s in per_aoi if s['level'] == 'sentence']
                    fft_values = [s['first_fixation_time'] for s in sent_stats if s['first_fixation_time'] is not None]
                    mean_fft = np.mean(fft_values) if fft_values else None

                    # 再訪問: AOI fixation_count > 1 の割合
                    revisit_aois = sum(1 for s in sent_stats if s['fixation_count'] > 1)
                    total_visited = sum(1 for s in sent_stats if s['fixation_count'] > 0)
                    revisit_rate = revisit_aois / total_visited if total_visited > 0 else 0

                    aoi_results.append({
                        'group': group, 'participant': pid, 'phase': phase,
                        'passage_id': seg_id, 'segment_index': seg_idx,
                        'dur_passage': dur_passage, 'dur_choice': dur_choice,
                        'dur_question': dur_question, 'dur_total': dur_total,
                        'passage_ratio': dur_passage / dur_total if dur_total > 0 else 0,
                        'choice_ratio': dur_choice / dur_total if dur_total > 0 else 0,
                        'question_ratio': dur_question / dur_total if dur_total > 0 else 0,
                        'mean_sent_fft': mean_fft,
                        'revisit_rate': revisit_rate,
                    })

                aoi_processed += 1
            except Exception as e:
                import traceback
                aoi_errors.append((label, traceback.format_exc()))

print(f'AOI処理完了: {aoi_processed} 参加者×フェーズ')
print(f'結果レコード数: {len(aoi_results)}')
if aoi_errors:
    print(f'エラー: {len(aoi_errors)}件')
    for lbl, msg in aoi_errors[:3]:
        print(f'  {lbl}: {msg.splitlines()[-1]}')

In [None]:
# --- C-2: AOI DataFrame & 参加者集約 ---
df_aoi = pd.DataFrame(aoi_results)
print(f'AOI DataFrame: {df_aoi.shape}')

aoi_metrics = ['passage_ratio', 'choice_ratio', 'question_ratio', 'mean_sent_fft', 'revisit_rate']
aoi_metric_labels = {
    'passage_ratio': '本文注視比率',
    'choice_ratio': '選択肢注視比率',
    'question_ratio': '問題文注視比率',
    'mean_sent_fft': '文レベル平均FFT (秒)',
    'revisit_rate': '再訪問率',
}

df_aoi_p = df_aoi.groupby(['group', 'participant', 'phase'])[aoi_metrics].mean().reset_index()
print(f'参加者レベル AOI DF: {df_aoi_p.shape}')
df_aoi_p.head()

In [None]:
# --- C-3: AOI 記述統計 ---
print('【AOI注視配分 記述統計】')
print('=' * 70)

for metric in aoi_metrics:
    print(f'\n--- {aoi_metric_labels[metric]} ---')
    summary = df_aoi_p.groupby(['group', 'phase'])[metric].agg(['mean', 'std', 'count'])
    summary.columns = ['M', 'SD', 'n']
    summary['M(SD)'] = summary.apply(lambda r: f"{r['M']:.4f} ({r['SD']:.4f})", axis=1)
    pivot = summary['M(SD)'].unstack(level='phase')
    phase_order = [p for p in ['pre', 'post'] if p in pivot.columns]
    print(pivot[phase_order])

In [None]:
# --- C-3b: 正規性検定 (Shapiro-Wilk) — AOI指標 ---
print('【正規性検定 (Shapiro-Wilk) — AOI注視配分指標】')
print('=' * 70)
print('※ p > 0.05 で正規性を仮定（○）、p ≤ 0.05 で棄却（×）')

aoi_norm_rows = []

for metric in aoi_metrics:
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_aoi_p, group, metric)
        if len(pre_v) < 3:
            continue
        diff = post_v - pre_v

        w_pre, p_pre = stats.shapiro(pre_v)
        w_post, p_post = stats.shapiro(post_v)
        w_diff, p_diff = stats.shapiro(diff)

        aoi_norm_rows.append({
            '指標': aoi_metric_labels[metric],
            '群': group,
            'n': len(pre_v),
            'Pre W': f'{w_pre:.3f}',
            'Pre p': f'{p_pre:.3f}',
            'Pre 正規性': '○' if p_pre > 0.05 else '×',
            'Post W': f'{w_post:.3f}',
            'Post p': f'{p_post:.3f}',
            'Post 正規性': '○' if p_post > 0.05 else '×',
            '差分 W': f'{w_diff:.3f}',
            '差分 p': f'{p_diff:.3f}',
            '差分 正規性': '○' if p_diff > 0.05 else '×',
        })

norm_aoi_df = pd.DataFrame(aoi_norm_rows)
print()
display(norm_aoi_df)

# 群間比較用: ゲインスコアの正規性
print('\n【AOIゲインスコアの正規性検定（群間比較の前提確認）】')
print('-' * 70)

aoi_gain_norm_rows = []
for metric in aoi_metrics:
    pre_A, post_A = get_paired_data(df_aoi_p, 'A', metric)
    pre_B, post_B = get_paired_data(df_aoi_p, 'B', metric)
    if len(pre_A) < 3 or len(pre_B) < 3:
        continue
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B

    w_A, p_A = stats.shapiro(gain_A)
    w_B, p_B = stats.shapiro(gain_B)

    aoi_gain_norm_rows.append({
        '指標': aoi_metric_labels[metric],
        'A群 W': f'{w_A:.3f}',
        'A群 p': f'{p_A:.3f}',
        'A群 正規性': '○' if p_A > 0.05 else '×',
        'B群 W': f'{w_B:.3f}',
        'B群 p': f'{p_B:.3f}',
        'B群 正規性': '○' if p_B > 0.05 else '×',
    })

aoi_gain_norm_df = pd.DataFrame(aoi_gain_norm_rows)
display(aoi_gain_norm_df)

# 判定サマリー
aoi_non_normal = norm_aoi_df[norm_aoi_df['差分 正規性'] == '×']
if len(aoi_non_normal) > 0:
    print(f'\n⚠ 差分が非正規: {len(aoi_non_normal)}件 → 対応ありt検定の代わりにWilcoxon推奨')
    for _, row in aoi_non_normal.iterrows():
        print(f"  - {row['指標']} ({row['群']}群)")
else:
    print('\n✓ 全AOI指標・全群で差分の正規性が確認された → 対応ありt検定が適用可能')

aoi_gain_non_normal = aoi_gain_norm_df[
    (aoi_gain_norm_df['A群 正規性'] == '×') | (aoi_gain_norm_df['B群 正規性'] == '×')
]
if len(aoi_gain_non_normal) > 0:
    print(f'⚠ ゲインが非正規: {len(aoi_gain_non_normal)}件 → 独立t検定の代わりにMann-Whitney U推奨')
    for _, row in aoi_gain_non_normal.iterrows():
        flags = []
        if row['A群 正規性'] == '×': flags.append('A群')
        if row['B群 正規性'] == '×': flags.append('B群')
        print(f"  - {row['指標']} ({', '.join(flags)})")
else:
    print('✓ 全AOI指標でゲインスコアの正規性が確認された → 独立t検定が適用可能')

In [None]:
# --- C-4: AOI 群内比較 + 群間比較 + ANOVA ---
print('【AOI Pre-Post比較】')
print('=' * 70)

aoi_within = []
aoi_between = []
aoi_anova = []

for metric in aoi_metrics:
    print(f'\n--- {aoi_metric_labels[metric]} ---')

    # 群内
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_aoi_p, group, metric)
        if len(pre_v) < 3:
            continue
        diff = post_v - pre_v
        t_stat, t_p = stats.ttest_rel(post_v, pre_v)
        d = cohens_d_paired(pre_v, post_v)
        print(f'  {group}群 (n={len(pre_v)}): Pre={np.mean(pre_v):.4f} -> Post={np.mean(post_v):.4f} '
              f'差={np.mean(diff):+.4f}, t={t_stat:.3f}, p={t_p:.3f}, d={d:.3f}')
        aoi_within.append({
            '指標': aoi_metric_labels[metric], '群': group, 'n': len(pre_v),
            'Pre平均': np.mean(pre_v), 'Post平均': np.mean(post_v),
            '差': np.mean(diff), 't': t_stat, 'p': t_p, 'd': d,
        })

    # 群間ゲイン
    pre_A, post_A = get_paired_data(df_aoi_p, 'A', metric)
    pre_B, post_B = get_paired_data(df_aoi_p, 'B', metric)
    if len(pre_A) >= 3 and len(pre_B) >= 3:
        gain_A = post_A - pre_A
        gain_B = post_B - pre_B
        t_stat, t_p = stats.ttest_ind(gain_A, gain_B)
        u_stat, u_p = stats.mannwhitneyu(gain_A, gain_B, alternative='two-sided')
        d = cohens_d_ind(gain_B, gain_A)
        ci_lo, ci_hi = bootstrap_ci(gain_B, gain_A, cohens_d_ind)
        print(f'  ゲイン A: {np.mean(gain_A):+.4f}, B: {np.mean(gain_B):+.4f} '
              f'| t={t_stat:.3f}, p={t_p:.3f}, d={d:.3f} [{ci_lo:.3f},{ci_hi:.3f}]')
        aoi_between.append({
            '指標': aoi_metric_labels[metric],
            'A群ゲイン': np.mean(gain_A), 'B群ゲイン': np.mean(gain_B),
            't': t_stat, 'p_t': t_p, 'U': u_stat, 'p_MW': u_p,
            'd': d, 'CI下限': ci_lo, 'CI上限': ci_hi,
        })

        # ANOVA
        res = mixed_anova(pre_A, post_A, pre_B, post_B)
        print(f'  ANOVA 交互作用: F={res["F_interaction"]:.3f}, p={res["p_interaction"]:.3f}, '
              f'η²p={res["eta2_interaction"]:.3f}')
        res['指標'] = aoi_metric_labels[metric]
        aoi_anova.append(res)

aoi_within_df = pd.DataFrame(aoi_within)
aoi_between_df = pd.DataFrame(aoi_between)
aoi_anova_df = pd.DataFrame(aoi_anova)

In [None]:
# --- C-5: AOI 可視化 ---
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, metric in enumerate(aoi_metrics):
    ax = axes[idx]
    means, sems, colors = [], [], []
    for group, color in [('A', '#4C72B0'), ('B', '#DD8452')]:
        for ph in ['pre', 'post']:
            vals = df_aoi_p[(df_aoi_p['group'] == group) & (df_aoi_p['phase'] == ph)][metric].dropna().values
            means.append(np.mean(vals) if len(vals) > 0 else 0)
            sems.append(np.std(vals, ddof=1) / np.sqrt(len(vals)) if len(vals) > 1 else 0)
            colors.append(color)
    x = np.arange(4)
    ax.bar(x, means, yerr=sems, capsize=4, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
    ax.set_xticks(x)
    ax.set_xticklabels(['A\npre', 'A\npost', 'B\npre', 'B\npost'])
    ax.set_title(aoi_metric_labels[metric])

for i in range(len(aoi_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('AOI注視配分: Pre vs Post（群別）', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# --- C-6: AOI ゲインスコア ボックスプロット ---
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, metric in enumerate(aoi_metrics):
    ax = axes[idx]
    gain_data, labels = [], []
    for group in ['A', 'B']:
        pre_v, post_v = get_paired_data(df_aoi_p, group, metric)
        gains = post_v - pre_v
        gain_data.append(gains)
        labels.append(f'{group}群\n(n={len(gains)})')
    bp = ax.boxplot(gain_data, tick_labels=labels, patch_artist=True,
                    medianprops=dict(color='black', linewidth=2))
    bp['boxes'][0].set_facecolor('#4C72B0')
    bp['boxes'][0].set_alpha(0.7)
    bp['boxes'][1].set_facecolor('#DD8452')
    bp['boxes'][1].set_alpha(0.7)
    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax.set_title(f'{aoi_metric_labels[metric]}\n(Post - Pre)')
    ax.set_ylabel('ゲイン')

for i in range(len(aoi_metrics), len(axes)):
    axes[i].set_visible(False)
plt.suptitle('AOI: ゲインスコア (Post - Pre)', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# --- C-7: 選択肢注視比率 個人別ゲインスコア ---
metric = 'choice_ratio'
fig, ax = plt.subplots(figsize=(10, 6))

for i, group in enumerate(['A', 'B']):
    sub = df_aoi_p[df_aoi_p['group'] == group]
    pre = sub[sub['phase'] == 'pre'].set_index('participant')[metric]
    post = sub[sub['phase'] == 'post'].set_index('participant')[metric]
    common = pre.index.intersection(post.index)
    pre_v, post_v = pre.loc[common], post.loc[common]
    gains = post_v - pre_v

    x = np.full(len(gains), i)
    jitter = np.random.default_rng(42).uniform(-0.08, 0.08, size=len(gains))
    color = '#4C72B0' if group == 'A' else '#DD8452'
    ax.scatter(x + jitter, gains.values, color=color, s=80, alpha=0.7, zorder=3, edgecolors='white')
    for xi, ji, yi, pid in zip(x, jitter, gains.values, gains.index):
        ax.annotate(pid, (xi + ji, yi), fontsize=8, ha='left', va='bottom',
                    xytext=(5, 2), textcoords='offset points')

    m, sd = gains.mean(), gains.std()
    ax.hlines(m, i - 0.2, i + 0.2, colors=color, linewidths=3, zorder=4, label=f'{group}群 M={m:.4f}')
    print(f'{group}群: M={m:.4f}, SD={sd:.4f}, n={len(gains)}')
    for pid, val in gains.items():
        print(f'  {pid}: {val:+.4f}')

ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
ax.set_xticks([0, 1])
ax.set_xticklabels(['A群', 'B群'])
ax.set_ylabel('選択肢注視比率ゲイン (Post - Pre)')
ax.set_title('選択肢注視比率: 個人別ゲインスコア')
ax.legend()
plt.tight_layout()
plt.show()

---
## セクションD: 探索的分析

サッカード分析、瞳孔径分析、時間経過分析（前半/後半の注視配分変化）、ベースライン交互作用。

In [None]:
# --- D-3: 瞳孔径分析 ---
print('【瞳孔径（認知負荷指標）分析】')
print('=' * 70)

metric = 'mean_pupil_diameter'
print(f'\n--- {seg_metric_labels[metric]} ---')

# 群内
for group in ['A', 'B']:
    pre_v, post_v = get_paired_data(df_seg, group, metric)
    if len(pre_v) < 3:
        continue
    diff = post_v - pre_v
    t_stat, t_p = stats.ttest_rel(post_v, pre_v)
    d = cohens_d_paired(pre_v, post_v)
    print(f'  {group}群: Pre={np.mean(pre_v):.3f} -> Post={np.mean(post_v):.3f}, '
          f'差={np.mean(diff):+.3f}, t={t_stat:.3f}, p={t_p:.3f}, d={d:.3f}')

# 群間
pre_A, post_A = get_paired_data(df_seg, 'A', metric)
pre_B, post_B = get_paired_data(df_seg, 'B', metric)
if len(pre_A) >= 3 and len(pre_B) >= 3:
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B
    t_stat, t_p = stats.ttest_ind(gain_A, gain_B)
    d = cohens_d_ind(gain_B, gain_A)
    print(f'\n  群間ゲイン: A={np.mean(gain_A):+.3f}, B={np.mean(gain_B):+.3f}')
    print(f'  t={t_stat:.3f}, p={t_p:.3f}, d={d:.3f}')

In [None]:
# --- D-4: 時間経過分析（前半/後半の注視配分変化） ---
# 各問題画面を前半・後半に分割し、注視配分の変化を見る
half_results = []

for _, row in df_aoi.iterrows():
    group, pid, phase = row['group'], row['participant'], row['phase']
    seg_idx = int(row['segment_index'])
    img_num = None

    # 対応する統計CSVから画像番号を取得
    st_path = OUTPUT_ROOT / group / pid / phase / 'statistics.csv'
    if st_path.exists():
        st = pd.read_csv(st_path)
        q_rows = st[st['event_type'] == 'question_screen_open']
        # seg_indexに対応する行を探す
        for i, (_, sr) in enumerate(q_rows.iterrows()):
            if i == (seg_idx - 1):  # seg_idx は1-indexed (phase_intro=0)
                img_num = str(int(sr['image_number'])).zfill(3)
                break

    if img_num is None:
        continue

    fix_path = WORKING_ROOT / group / pid / phase / 'fixation_corrected' / f'{img_num}.csv'
    if not fix_path.exists():
        continue

    try:
        fix_df = pd.read_csv(fix_path)
        if len(fix_df) < 4:
            continue
        mid = len(fix_df) // 2
        # 前半/後半での平均x座標（左=本文領域, 右=選択肢領域, 閾値~480px）
        x_first = fix_df['x'].iloc[:mid].mean()
        x_second = fix_df['x'].iloc[mid:].mean()
        # 本文領域は x < 480 と仮定（左パネル）
        passage_fix_first = (fix_df['x'].iloc[:mid] < 480).mean()
        passage_fix_second = (fix_df['x'].iloc[mid:] < 480).mean()

        half_results.append({
            'group': group, 'participant': pid, 'phase': phase,
            '前半本文比率': passage_fix_first,
            '後半本文比率': passage_fix_second,
            'シフト': passage_fix_second - passage_fix_first,
        })
    except Exception:
        continue

df_half = pd.DataFrame(half_results)
if len(df_half) > 0:
    df_half_p = df_half.groupby(['group', 'participant', 'phase'])[
        ['前半本文比率', '後半本文比率', 'シフト']].mean().reset_index()

    print('【前半/後半の本文注視比率】')
    print('=' * 70)
    for group in ['A', 'B']:
        for ph in ['pre', 'post']:
            vals = df_half_p[(df_half_p['group'] == group) & (df_half_p['phase'] == ph)]
            if len(vals) > 0:
                print(f'  {group}/{ph}: 前半={vals["前半本文比率"].mean():.3f}, '
                      f'後半={vals["後半本文比率"].mean():.3f}, '
                      f'シフト={vals["シフト"].mean():+.3f}')

    # シフトの群間比較
    pre_A, post_A = get_paired_data(df_half_p, 'A', 'シフト')
    pre_B, post_B = get_paired_data(df_half_p, 'B', 'シフト')
    if len(pre_A) >= 3 and len(pre_B) >= 3:
        gain_A = post_A - pre_A
        gain_B = post_B - pre_B
        t_stat, t_p = stats.ttest_ind(gain_A, gain_B)
        print(f'\n  シフト ゲイン比較: t={t_stat:.3f}, p={t_p:.3f}')
else:
    print('前半/後半データなし')

In [None]:
# --- D-5: ベースライン交互作用（Pre得点による調整） ---
# score-summaryからpre得点を取得
print('【ベースライン交互作用分析】')
print('=' * 70)

pre_scores = {}
for group, pids in participants.items():
    for pid in pids:
        log_dir = INPUT_ROOT / group / pid / 'pre' / 'logs'
        if not log_dir.is_dir():
            continue
        evt_files = sorted([f for f in os.listdir(log_dir) if f.endswith('.jsonl')])
        if not evt_files:
            continue
        correct, total = 0, 0
        with open(log_dir / evt_files[-1], 'r') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    event = json.loads(line)
                    if event.get('event') == 'answer_submit':
                        correct += event.get('correct_count', 0)
                        total += event.get('total_count', 0)
                except json.JSONDecodeError:
                    pass
        if total > 0:
            pre_scores[pid] = correct / total * 100

print(f'Pre得点取得: {len(pre_scores)}名')

# 主要指標のゲインとPre得点の関係
key_metrics_for_baseline = [
    ('mean_fixation_duration', df_seg, seg_metric_labels),
]
if len(df_aoi_p) > 0:
    key_metrics_for_baseline.append(('passage_ratio', df_aoi_p, aoi_metric_labels))

for metric, df_src, label_dict in key_metrics_for_baseline:
    print(f'\n--- {label_dict[metric]} ---')
    gains_all = []
    pre_scores_all = []
    groups_all = []

    for group in ['A', 'B']:
        pre_v_df = df_src[(df_src['group'] == group) & (df_src['phase'] == 'pre')].set_index('participant')[metric]
        post_v_df = df_src[(df_src['group'] == group) & (df_src['phase'] == 'post')].set_index('participant')[metric]
        common = pre_v_df.index.intersection(post_v_df.index)
        for pid in common:
            if pid in pre_scores and not np.isnan(pre_v_df[pid]) and not np.isnan(post_v_df[pid]):
                gains_all.append(post_v_df[pid] - pre_v_df[pid])
                pre_scores_all.append(pre_scores[pid])
                groups_all.append(group)

    gains_arr = np.array(gains_all)
    scores_arr = np.array(pre_scores_all)
    groups_arr = np.array(groups_all)

    if len(gains_arr) >= 6:
        # 中央値分割: 低スコア vs 高スコア
        median_score = np.median(scores_arr)
        print(f'  Pre得点中央値: {median_score:.1f}%')

        for score_label, mask in [('低得点', scores_arr <= median_score),
                                   ('高得点', scores_arr > median_score)]:
            for g in ['A', 'B']:
                g_mask = mask & (groups_arr == g)
                vals = gains_arr[g_mask]
                if len(vals) > 0:
                    print(f'    {score_label} {g}群: n={len(vals)}, gain M={np.mean(vals):+.4f}')

        # 低得点者のみの群間比較
        low_A = gains_arr[mask & (groups_arr == 'A')]
        low_B = gains_arr[mask & (groups_arr == 'B')]
        if len(low_A) >= 2 and len(low_B) >= 2:
            t_stat, t_p = stats.ttest_ind(low_A, low_B)
            print(f'  低得点者 群間: t={t_stat:.3f}, p={t_p:.3f}')

---
## セクションE: 統合的統計まとめ

多重比較補正、ベイズt検定、検出力分析、効果量サマリーテーブル。

In [None]:
# --- E-1: 全比較結果の統合 ---
all_comparisons = []

# セグメントレベル
for _, row in between_df.iterrows():
    all_comparisons.append({
        'ファミリー': 'セグメント', '指標': row['指標'],
        'A群ゲイン': row['A群ゲイン平均'], 'B群ゲイン': row['B群ゲイン平均'],
        'p': row['p_t'], 'd': row['Cohen_d'],
        'CI下限': row['CI下限'], 'CI上限': row['CI上限'],
    })

# AOIレベル
if len(aoi_between_df) > 0:
    for _, row in aoi_between_df.iterrows():
        all_comparisons.append({
            'ファミリー': 'AOI', '指標': row['指標'],
            'A群ゲイン': row['A群ゲイン'], 'B群ゲイン': row['B群ゲイン'],
            'p': row['p_t'], 'd': row['d'],
            'CI下限': row['CI下限'], 'CI上限': row['CI上限'],
        })

df_comp = pd.DataFrame(all_comparisons)
print(f'全比較数: {len(df_comp)}')

In [None]:
# --- E-2: Benjamini-Hochberg FDR補正 ---
print('【多重比較補正 (Benjamini-Hochberg FDR)】')
print('=' * 70)

for family in df_comp['ファミリー'].unique():
    mask = df_comp['ファミリー'] == family
    p_vals = df_comp.loc[mask, 'p'].values
    m = len(p_vals)
    if m == 0:
        continue
    # BH手続き
    sorted_idx = np.argsort(p_vals)
    p_adj = np.zeros(m)
    for rank_i, orig_i in enumerate(sorted_idx):
        p_adj[orig_i] = p_vals[orig_i] * m / (rank_i + 1)
    # 単調性の強制
    for i in range(m - 2, -1, -1):
        idx = sorted_idx[i]
        idx_next = sorted_idx[i + 1]
        p_adj[idx] = min(p_adj[idx], p_adj[idx_next])
    p_adj = np.minimum(p_adj, 1.0)
    df_comp.loc[mask, 'p_adj'] = p_adj

    print(f'\n{family} ファミリー ({m}比較):')
    sub = df_comp[mask][['指標', 'p', 'p_adj', 'd']].copy()
    sub['判定'] = sub['p_adj'].apply(lambda x: '*' if x < 0.05 else 'n.s.')
    print(sub.to_string(index=False))

In [None]:
# --- E-3: ベイズt検定 ---
print('【ベイズt検定 (JZS事前分布)】')
print('=' * 70)

def bayes_t_test_ind(x, y, r=0.707):
    """JZS事前分布によるベイズt検定（独立サンプル）"""
    from scipy.stats import t as t_dist
    nx, ny = len(x), len(y)
    n = nx + ny
    pooled_var = ((nx-1)*np.var(x, ddof=1) + (ny-1)*np.var(y, ddof=1)) / (n-2)
    se = np.sqrt(pooled_var * (1/nx + 1/ny))
    t_val = (np.mean(x) - np.mean(y)) / se if se > 0 else 0
    df = n - 2
    neff = (nx * ny) / (nx + ny)

    lik_h0 = t_dist.pdf(t_val, df)

    def integrand(delta):
        return t_dist.pdf(t_val, df, loc=delta * np.sqrt(neff)) * stats.cauchy.pdf(delta, scale=r)

    marg_h1, _ = quad(integrand, -np.inf, np.inf)
    bf10 = marg_h1 / lik_h0 if lik_h0 > 0 else np.inf
    return bf10, 1/bf10 if bf10 > 0 else np.inf


bf_results = []

# セグメントレベル
for metric in seg_metrics:
    pre_A, post_A = get_paired_data(df_seg, 'A', metric)
    pre_B, post_B = get_paired_data(df_seg, 'B', metric)
    if len(pre_A) < 3 or len(pre_B) < 3:
        continue
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B
    bf10, bf01 = bayes_t_test_ind(gain_A, gain_B)
    bf_results.append({'ファミリー': 'セグメント', '指標': seg_metric_labels[metric], 'BF10': bf10, 'BF01': bf01})
    print(f'  セグメント/{seg_metric_labels[metric]}: BF10={bf10:.3f}, BF01={bf01:.3f}')

# AOIレベル
for metric in aoi_metrics:
    pre_A, post_A = get_paired_data(df_aoi_p, 'A', metric)
    pre_B, post_B = get_paired_data(df_aoi_p, 'B', metric)
    if len(pre_A) < 3 or len(pre_B) < 3:
        continue
    gain_A = post_A - pre_A
    gain_B = post_B - pre_B
    bf10, bf01 = bayes_t_test_ind(gain_A, gain_B)
    bf_results.append({'ファミリー': 'AOI', '指標': aoi_metric_labels[metric], 'BF10': bf10, 'BF01': bf01})
    print(f'  AOI/{aoi_metric_labels[metric]}: BF10={bf10:.3f}, BF01={bf01:.3f}')

bf_df = pd.DataFrame(bf_results)

# BF解釈
def interpret_bf(bf10):
    if bf10 > 10: return 'H1強い証拠'
    if bf10 > 3: return 'H1中程度の証拠'
    if bf10 > 1: return 'H1弱い証拠'
    if 1/bf10 > 10: return 'H0強い証拠'
    if 1/bf10 > 3: return 'H0中程度の証拠'
    return '判断不能'

bf_df['解釈'] = bf_df['BF10'].apply(interpret_bf)
print('\n')
bf_df

In [None]:
# --- E-4: 検出力分析 ---
print('【検出力分析】')
print('=' * 70)

def power_ind(n1, n2, d, alpha=0.05):
    df = n1 + n2 - 2
    ncp = d * np.sqrt((n1 * n2) / (n1 + n2))
    t_crit = stats.t.ppf(1 - alpha/2, df)
    return 1 - nct.cdf(t_crit, df, ncp) + nct.cdf(-t_crit, df, ncp)

def required_n(d, target_power=0.80, alpha=0.05):
    for n in range(5, 1000):
        if power_ind(n, n, d, alpha) >= target_power:
            return n
    return '>1000'

power_rows = []

for _, row in df_comp.iterrows():
    d_obs = abs(row['d'])
    pw = power_ind(10, 10, d_obs) if d_obs > 0 else 0
    n_req = required_n(d_obs) if d_obs > 0 else '>1000'
    power_rows.append({
        'ファミリー': row['ファミリー'], '指標': row['指標'],
        '観測d': d_obs, '検出力_n10': pw, '80%必要n': n_req,
    })

power_df = pd.DataFrame(power_rows)
print(power_df.to_string(index=False))

# 基準効果量での検出力
print('\n\n標準効果量での検出力 (各群n=10):')
for d_val, label in [(0.2, '小'), (0.5, '中'), (0.8, '大')]:
    pw = power_ind(10, 10, d_val)
    print(f'  d={d_val} ({label}): 検出力={pw:.3f}, 80%に必要なn={required_n(d_val)}')

In [None]:
# --- E-5: 効果量サマリーテーブル ---
print('【効果量サマリーテーブル】')
print('=' * 70)

# BFをマージ
summary_df = df_comp.copy()
bf_map = {row['指標']: row['BF10'] for _, row in bf_df.iterrows()}
summary_df['BF10'] = summary_df['指標'].map(bf_map)

# 検出力をマージ
pw_map = {(row['ファミリー'], row['指標']): row['検出力_n10'] for _, row in power_df.iterrows()}
summary_df['検出力'] = summary_df.apply(lambda r: pw_map.get((r['ファミリー'], r['指標']), np.nan), axis=1)

display_cols = ['ファミリー', '指標', 'd', 'p', 'p_adj', 'CI下限', 'CI上限', 'BF10', '検出力']
summary_df = summary_df[[c for c in display_cols if c in summary_df.columns]]
summary_df = summary_df.round(4)
print(summary_df.to_string(index=False))

---
## セクションF: 結論・出力

In [None]:
# --- F-1: 結果まとめ ---
print('=' * 70)
print('【A群 vs B群 視線行動比較分析 — 結果まとめ】')
print('=' * 70)

print('\n■ セクションA: セグメントレベル (Pre-Post ゲインスコア)')
for _, row in between_df.iterrows():
    sig = '*' if row['p_t'] < 0.05 else 'n.s.'
    print(f'  {row["指標"]}: d={row["Cohen_d"]:.3f}, p={row["p_t"]:.3f} {sig}')

print('\n■ セクションA: 混合分散分析 交互作用')
for _, row in pd.DataFrame(anova_results).iterrows():
    sig = '*' if row['p_interaction'] < 0.05 else 'n.s.'
    print(f'  {row["指標"]}: F={row["F_interaction"]:.3f}, p={row["p_interaction"]:.3f}, '
          f'η²p={row["eta2_interaction"]:.3f} {sig}')

if len(aoi_between_df) > 0:
    print('\n■ セクションC: AOI注視配分 (Pre-Post ゲインスコア)')
    for _, row in aoi_between_df.iterrows():
        sig = '*' if row['p_t'] < 0.05 else 'n.s.'
        print(f'  {row["指標"]}: d={row["d"]:.3f}, p={row["p_t"]:.3f} {sig}')

print('\n■ セクションE: ベイズ分析')
for _, row in bf_df.iterrows():
    print(f'  {row["指標"]}: BF10={row["BF10"]:.3f} ({row["解釈"]})')

print('\n■ 検出力')
for _, row in power_df.iterrows():
    print(f'  {row["指標"]}: |d|={row["観測d"]:.3f}, 検出力={row["検出力_n10"]:.3f}, '
          f'80%に必要なn={row["80%必要n"]}')

In [None]:
# --- F-2: 論文用サマリーテーブル出力 ---
output_path = OUTPUT_ROOT / 'gaze_comparison_results.csv'
output_path.parent.mkdir(parents=True, exist_ok=True)

# 全結果を統合
final_rows = []

# セグメントレベル
for _, row in between_df.iterrows():
    final_rows.append({
        '分析': 'セグメント ゲイン', '指標': row['指標'],
        'A群平均': row['A群ゲイン平均'], 'A群SD': row['A群ゲインSD'],
        'B群平均': row['B群ゲイン平均'], 'B群SD': row['B群ゲインSD'],
        't': row['t'], 'p': row['p_t'], 'p_MW': row['p_MW'],
        'Cohen_d': row['Cohen_d'], 'CI下限': row['CI下限'], 'CI上限': row['CI上限'],
    })

# AOI
if len(aoi_between_df) > 0:
    for _, row in aoi_between_df.iterrows():
        final_rows.append({
            '分析': 'AOI ゲイン', '指標': row['指標'],
            'A群平均': row['A群ゲイン'], 'B群平均': row['B群ゲイン'],
            't': row['t'], 'p': row['p_t'], 'p_MW': row['p_MW'],
            'Cohen_d': row['d'], 'CI下限': row['CI下限'], 'CI上限': row['CI上限'],
        })

# ANOVA
for _, row in pd.DataFrame(anova_results).iterrows():
    final_rows.append({
        '分析': '混合分散分析 交互作用', '指標': row['指標'],
        'F': row['F_interaction'], 'p': row['p_interaction'],
        'η²p': row['eta2_interaction'],
    })

# BF
for _, row in bf_df.iterrows():
    final_rows.append({
        '分析': 'ベイズファクター', '指標': row['指標'],
        'BF10': row['BF10'], 'BF01': row['BF01'],
        '解釈': row['解釈'],
    })

final_df = pd.DataFrame(final_rows)
final_df.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f'結果保存先: {output_path}')
print(f'行数: {len(final_df)}')
final_df

In [None]:

import os
import pandas as pd
import numpy as np
from pathlib import Path

base_path = Path("/Users/kyoya/Laboratory/metacognition-analysis/data/input")

group_a = ["P001", "P002", "P005", "P006", "P008", "P009", "P010", "P011", "P016", "P017"]
group_b = ["P003", "P004", "P007", "P012", "P013", "P014", "P015", "P018", "P019", "P020"]
phases = ["pre", "post", "training1", "training2", "training3"]

results = []

for group_name, participants in [("A", group_a), ("B", group_b)]:
    for pid in participants:
        for phase in phases:
            eye_dir = base_path / group_name / pid / phase / "eye_tracking"
            
            if not eye_dir.exists():
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": "MISSING_DIR",
                    "Total_Duration_s": np.nan,
                    "Total_Rows": 0,
                    "Sampling_Rate_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan,
                    "Left_Valid_Pct": np.nan,
                    "Right_Valid_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            # Find the timestamp subdirectory
            subdirs = [d for d in eye_dir.iterdir() if d.is_dir()]
            if len(subdirs) == 0:
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": "NO_SUBDIR",
                    "Total_Duration_s": np.nan,
                    "Total_Rows": 0,
                    "Sampling_Rate_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan,
                    "Left_Valid_Pct": np.nan,
                    "Right_Valid_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            ts_dir = subdirs[0]  # Take first (should be only one)
            csv_path = ts_dir / "tobii_pro_gaze.csv"
            
            if not csv_path.exists():
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": "NO_CSV",
                    "Total_Duration_s": np.nan,
                    "Total_Rows": 0,
                    "Sampling_Rate_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan,
                    "Left_Valid_Pct": np.nan,
                    "Right_Valid_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            try:
                df = pd.read_csv(csv_path)
                
                # Columns
                ts_col = df.columns[0]  # '#timestamp'
                gaze_x_col = 'gaze_x'
                gaze_y_col = 'gaze_y'
                
                total_rows = len(df)
                
                # Duration in seconds (timestamps are in milliseconds)
                timestamps = df[ts_col].values
                duration_s = (timestamps[-1] - timestamps[0]) / 1000.0
                
                # Sampling rate
                sampling_rate = total_rows / duration_s if duration_s > 0 else 0
                
                # Valid gaze: both gaze_x and gaze_y are not NaN
                valid_gaze = df[gaze_x_col].notna() & df[gaze_y_col].notna()
                valid_gaze_pct = valid_gaze.sum() / total_rows * 100 if total_rows > 0 else 0
                
                # Per-eye validity using the validity columns
                left_valid_col = 'left_gaze_origin_validity'
                right_valid_col = 'right_gaze_origin_validity'
                
                left_valid_pct = (df[left_valid_col] == 1).sum() / total_rows * 100 if total_rows > 0 else 0
                right_valid_pct = (df[right_valid_col] == 1).sum() / total_rows * 100 if total_rows > 0 else 0
                
                # Determine flags
                flags = []
                if sampling_rate < 80:
                    flags.append(f"LOW_SR({sampling_rate:.0f}Hz)")
                if valid_gaze_pct < 70:
                    flags.append(f"LOW_VALID({valid_gaze_pct:.1f}%)")
                
                flag_str = ", ".join(flags) if flags else "OK"
                
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": "OK",
                    "Total_Duration_s": round(duration_s, 1),
                    "Total_Rows": total_rows,
                    "Sampling_Rate_Hz": round(sampling_rate, 1),
                    "Valid_Gaze_Pct": round(valid_gaze_pct, 1),
                    "Left_Valid_Pct": round(left_valid_pct, 1),
                    "Right_Valid_Pct": round(right_valid_pct, 1),
                    "Flag": flag_str
                })
                
            except Exception as e:
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": f"ERROR: {str(e)[:50]}",
                    "Total_Duration_s": np.nan,
                    "Total_Rows": 0,
                    "Sampling_Rate_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan,
                    "Left_Valid_Pct": np.nan,
                    "Right_Valid_Pct": np.nan,
                    "Flag": "ERROR"
                })

df_results = pd.DataFrame(results)
print(f"Total entries: {len(df_results)}")
print(f"Unique participants: {df_results['Participant'].nunique()}")
print(f"Statuses: {df_results['Status'].value_counts().to_dict()}")
print(f"Flags: {df_results['Flag'].value_counts().to_dict()}")


In [None]:

# Let's check the timestamp unit by looking at consecutive differences
import pandas as pd

csv_path = "/Users/kyoya/Laboratory/metacognition-analysis/data/input/A/P001/pre/eye_tracking/2026-01-16_17-33-45.563036/tobii_pro_gaze.csv"
df_sample = pd.read_csv(csv_path)

ts = df_sample.iloc[:, 0].values
print("First few timestamps:", ts[:5])
print("Diffs between consecutive timestamps (first 20):", np.diff(ts[:21]))
print("Mean diff:", np.mean(np.diff(ts[:1000])))
print("Median diff:", np.median(np.diff(ts[:1000])))

# If ~90Hz, the interval should be ~11.1ms or ~11111 microseconds
# Diffs around 11 => ms
# Diffs around 11000 => microseconds


In [None]:

# Timestamps are in milliseconds! But mean diff is ~4696ms which is very high.
# That suggests there are large gaps in the data. The median is ~11ms which is correct for 90Hz.
# The large mean indicates gaps (e.g., between phases/screens).

# Let me check - is this data for the ENTIRE session including idle time?
# The duration calculated was total time from first to last timestamp
# But the actual recording includes all screen transitions, reflections, etc.

# So the issue is: the total duration includes ALL time (breaks, reflections, etc.)
# but data is only recorded during active screen viewing.

# Let me recalculate with a better approach:
# 1. Use median inter-sample interval to determine actual sampling rate
# 2. Count "active" samples (where inter-sample interval < some threshold)

# For expected 90Hz: inter-sample should be ~11.1ms
# Let's consider samples with gap < 50ms as "continuous"

print(f"Total rows: {len(df_sample)}")
print(f"Total span: {(ts[-1] - ts[0])/1000:.1f} seconds = {(ts[-1] - ts[0])/1000/60:.1f} minutes")

diffs = np.diff(ts)
print(f"\nInter-sample interval stats:")
print(f"  Min: {np.min(diffs):.2f} ms")
print(f"  Median: {np.median(diffs):.2f} ms")
print(f"  Mean: {np.mean(diffs):.2f} ms")
print(f"  Max: {np.max(diffs):.2f} ms")
print(f"  Std: {np.std(diffs):.2f} ms")

# Percentage of "normal" intervals (< 15ms for 90Hz)
normal_intervals = diffs < 15
print(f"\n  Normal intervals (<15ms): {normal_intervals.sum()} ({normal_intervals.sum()/len(diffs)*100:.1f}%)")
print(f"  Gap intervals (>=15ms): {(~normal_intervals).sum()}")

# Effective sampling rate from median
effective_sr = 1000.0 / np.median(diffs)
print(f"\n  Effective sampling rate (from median): {effective_sr:.1f} Hz")


In [None]:

import os
import pandas as pd
import numpy as np
from pathlib import Path

base_path = Path("/Users/kyoya/Laboratory/metacognition-analysis/data/input")

group_a = ["P001", "P002", "P005", "P006", "P008", "P009", "P010", "P011", "P016", "P017"]
group_b = ["P003", "P004", "P007", "P012", "P013", "P014", "P015", "P018", "P019", "P020"]
phases = ["pre", "post", "training1", "training2", "training3"]

results = []

for group_name, participants in [("A", group_a), ("B", group_b)]:
    for pid in participants:
        for phase in phases:
            eye_dir = base_path / group_name / pid / phase / "eye_tracking"
            
            if not eye_dir.exists():
                results.append({
                    "Group": group_name, "Participant": pid, "Phase": phase,
                    "Status": "MISSING_DIR",
                    "Total_Span_min": np.nan, "Active_Duration_s": np.nan,
                    "Total_Rows": 0, "Median_SR_Hz": np.nan, "Effective_SR_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan, "Left_Valid_Pct": np.nan, "Right_Valid_Pct": np.nan,
                    "Num_Gaps": 0, "Normal_Interval_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            subdirs = [d for d in eye_dir.iterdir() if d.is_dir()]
            if len(subdirs) == 0:
                results.append({
                    "Group": group_name, "Participant": pid, "Phase": phase,
                    "Status": "NO_SUBDIR",
                    "Total_Span_min": np.nan, "Active_Duration_s": np.nan,
                    "Total_Rows": 0, "Median_SR_Hz": np.nan, "Effective_SR_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan, "Left_Valid_Pct": np.nan, "Right_Valid_Pct": np.nan,
                    "Num_Gaps": 0, "Normal_Interval_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            ts_dir = subdirs[0]
            csv_path = ts_dir / "tobii_pro_gaze.csv"
            
            if not csv_path.exists():
                results.append({
                    "Group": group_name, "Participant": pid, "Phase": phase,
                    "Status": "NO_CSV",
                    "Total_Span_min": np.nan, "Active_Duration_s": np.nan,
                    "Total_Rows": 0, "Median_SR_Hz": np.nan, "Effective_SR_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan, "Left_Valid_Pct": np.nan, "Right_Valid_Pct": np.nan,
                    "Num_Gaps": 0, "Normal_Interval_Pct": np.nan,
                    "Flag": "MISSING"
                })
                continue
            
            try:
                df = pd.read_csv(csv_path)
                ts_col = df.columns[0]
                total_rows = len(df)
                
                if total_rows < 2:
                    results.append({
                        "Group": group_name, "Participant": pid, "Phase": phase,
                        "Status": "TOO_FEW_ROWS",
                        "Total_Span_min": np.nan, "Active_Duration_s": np.nan,
                        "Total_Rows": total_rows, "Median_SR_Hz": np.nan, "Effective_SR_Hz": np.nan,
                        "Valid_Gaze_Pct": np.nan, "Left_Valid_Pct": np.nan, "Right_Valid_Pct": np.nan,
                        "Num_Gaps": 0, "Normal_Interval_Pct": np.nan,
                        "Flag": "TOO_FEW_ROWS"
                    })
                    continue
                
                timestamps = df[ts_col].values
                total_span_s = (timestamps[-1] - timestamps[0]) / 1000.0
                total_span_min = total_span_s / 60.0
                
                diffs = np.diff(timestamps)
                
                # Normal intervals: < 15ms (expected ~11ms for 90Hz)
                GAP_THRESHOLD_MS = 15
                normal_mask = diffs < GAP_THRESHOLD_MS
                normal_intervals = diffs[normal_mask]
                gap_intervals = diffs[~normal_mask]
                
                normal_interval_pct = normal_mask.sum() / len(diffs) * 100
                num_gaps = (~normal_mask).sum()
                
                # Median sampling rate from normal intervals
                if len(normal_intervals) > 0:
                    median_sr = 1000.0 / np.median(normal_intervals)
                else:
                    median_sr = 0
                
                # Active duration: sum of normal intervals
                active_duration_s = normal_intervals.sum() / 1000.0
                
                # Effective sampling rate: rows / total span
                effective_sr = total_rows / total_span_s if total_span_s > 0 else 0
                
                # Valid gaze percentage
                valid_gaze = df['gaze_x'].notna() & df['gaze_y'].notna()
                valid_gaze_pct = valid_gaze.sum() / total_rows * 100
                
                # Per-eye validity
                left_valid_pct = (df['left_gaze_origin_validity'] == 1).sum() / total_rows * 100
                right_valid_pct = (df['right_gaze_origin_validity'] == 1).sum() / total_rows * 100
                
                # Flags
                flags = []
                if median_sr < 85:
                    flags.append(f"LOW_MEDIAN_SR({median_sr:.0f}Hz)")
                if valid_gaze_pct < 70:
                    flags.append(f"LOW_VALID({valid_gaze_pct:.1f}%)")
                if left_valid_pct < 70 and right_valid_pct < 70:
                    flags.append(f"BOTH_EYES_LOW")
                elif left_valid_pct < 70:
                    flags.append(f"LEFT_EYE_LOW({left_valid_pct:.0f}%)")
                elif right_valid_pct < 70:
                    flags.append(f"RIGHT_EYE_LOW({right_valid_pct:.0f}%)")
                
                flag_str = ", ".join(flags) if flags else "OK"
                
                results.append({
                    "Group": group_name,
                    "Participant": pid,
                    "Phase": phase,
                    "Status": "OK",
                    "Total_Span_min": round(total_span_min, 1),
                    "Active_Duration_s": round(active_duration_s, 1),
                    "Total_Rows": total_rows,
                    "Median_SR_Hz": round(median_sr, 1),
                    "Effective_SR_Hz": round(effective_sr, 1),
                    "Valid_Gaze_Pct": round(valid_gaze_pct, 1),
                    "Left_Valid_Pct": round(left_valid_pct, 1),
                    "Right_Valid_Pct": round(right_valid_pct, 1),
                    "Num_Gaps": int(num_gaps),
                    "Normal_Interval_Pct": round(normal_interval_pct, 1),
                    "Flag": flag_str
                })
                
            except Exception as e:
                results.append({
                    "Group": group_name, "Participant": pid, "Phase": phase,
                    "Status": f"ERROR: {str(e)[:60]}",
                    "Total_Span_min": np.nan, "Active_Duration_s": np.nan,
                    "Total_Rows": 0, "Median_SR_Hz": np.nan, "Effective_SR_Hz": np.nan,
                    "Valid_Gaze_Pct": np.nan, "Left_Valid_Pct": np.nan, "Right_Valid_Pct": np.nan,
                    "Num_Gaps": 0, "Normal_Interval_Pct": np.nan,
                    "Flag": "ERROR"
                })

df_results = pd.DataFrame(results)
print(f"Processing complete: {len(df_results)} entries")
print(f"All statuses: {df_results['Status'].value_counts().to_dict()}")


In [None]:

# Now let's display the comprehensive results table

# Sort: problematic first (non-OK flags), then by group/participant/phase
df_results['is_flagged'] = df_results['Flag'] != 'OK'
df_results_sorted = df_results.sort_values(
    by=['is_flagged', 'Valid_Gaze_Pct', 'Group', 'Participant', 'Phase'],
    ascending=[False, True, True, True, True]
).drop(columns=['is_flagged'])

# Set display options for full table
pd.set_option('display.max_rows', 120)
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 250)
pd.set_option('display.max_colwidth', 40)

print("=" * 220)
print("EYE TRACKING DATA QUALITY REPORT - ALL PARTICIPANTS & PHASES")
print("=" * 220)
print()

# First show flagged entries
flagged = df_results_sorted[df_results_sorted['Flag'] != 'OK']
print(f"*** FLAGGED ENTRIES: {len(flagged)} out of {len(df_results)} ***")
print()

if len(flagged) > 0:
    print(flagged[['Group', 'Participant', 'Phase', 'Total_Span_min', 'Active_Duration_s', 
                    'Total_Rows', 'Median_SR_Hz', 'Valid_Gaze_Pct', 'Left_Valid_Pct', 
                    'Right_Valid_Pct', 'Num_Gaps', 'Flag']].to_string(index=False))
    print()

print("-" * 220)
print()
print("FULL TABLE (sorted: flagged first, then by valid gaze %)")
print()
print(df_results_sorted[['Group', 'Participant', 'Phase', 'Total_Span_min', 'Active_Duration_s',
                          'Total_Rows', 'Median_SR_Hz', 'Valid_Gaze_Pct', 'Left_Valid_Pct',
                          'Right_Valid_Pct', 'Num_Gaps', 'Flag']].to_string(index=False))


In [None]:

# Now let's print summary statistics

print("=" * 120)
print("SUMMARY STATISTICS")
print("=" * 120)
print()

ok_data = df_results[df_results['Status'] == 'OK']

print("--- Overall ---")
print(f"  Total participant-phase combinations: {len(df_results)}")
print(f"  Successfully loaded: {len(ok_data)}")
print(f"  Flagged (any issue): {len(ok_data[ok_data['Flag'] != 'OK'])}")
print()

print("--- Median Sampling Rate (Hz) ---")
print(f"  Mean:   {ok_data['Median_SR_Hz'].mean():.1f}")
print(f"  Median: {ok_data['Median_SR_Hz'].median():.1f}")
print(f"  Min:    {ok_data['Median_SR_Hz'].min():.1f}")
print(f"  Max:    {ok_data['Median_SR_Hz'].max():.1f}")
print()

print("--- Valid Gaze Percentage ---")
print(f"  Mean:   {ok_data['Valid_Gaze_Pct'].mean():.1f}%")
print(f"  Median: {ok_data['Valid_Gaze_Pct'].median():.1f}%")
print(f"  Min:    {ok_data['Valid_Gaze_Pct'].min():.1f}%")
print(f"  Max:    {ok_data['Valid_Gaze_Pct'].max():.1f}%")
print(f"  <70%:   {len(ok_data[ok_data['Valid_Gaze_Pct'] < 70])} entries")
print(f"  <80%:   {len(ok_data[ok_data['Valid_Gaze_Pct'] < 80])} entries")
print(f"  <90%:   {len(ok_data[ok_data['Valid_Gaze_Pct'] < 90])} entries")
print()

print("--- Per-Group Summary ---")
for grp in ['A', 'B']:
    grp_data = ok_data[ok_data['Group'] == grp]
    print(f"\n  Group {grp}:")
    print(f"    Mean Valid Gaze:   {grp_data['Valid_Gaze_Pct'].mean():.1f}%")
    print(f"    Median Valid Gaze: {grp_data['Valid_Gaze_Pct'].median():.1f}%")
    print(f"    Mean Median SR:    {grp_data['Median_SR_Hz'].mean():.1f} Hz")
    print(f"    Flagged entries:   {len(grp_data[grp_data['Flag'] != 'OK'])}")

print()
print("--- Per-Phase Summary ---")
for phase in phases:
    ph_data = ok_data[ok_data['Phase'] == phase]
    print(f"\n  {phase}:")
    print(f"    Mean Valid Gaze:   {ph_data['Valid_Gaze_Pct'].mean():.1f}%")
    print(f"    Median Valid Gaze: {ph_data['Valid_Gaze_Pct'].median():.1f}%")
    print(f"    Flagged entries:   {len(ph_data[ph_data['Flag'] != 'OK'])}")


In [None]:

# Let's also look at per-participant averages to identify systematically problematic participants

print("=" * 120)
print("PER-PARTICIPANT SUMMARY (averaged across all phases)")
print("=" * 120)
print()

participant_summary = ok_data.groupby(['Group', 'Participant']).agg({
    'Valid_Gaze_Pct': ['mean', 'min', 'std'],
    'Left_Valid_Pct': 'mean',
    'Right_Valid_Pct': 'mean',
    'Total_Rows': 'sum',
    'Active_Duration_s': 'sum',
    'Num_Gaps': 'sum'
}).round(1)

participant_summary.columns = ['Valid_Gaze_Mean', 'Valid_Gaze_Min', 'Valid_Gaze_Std',
                                'Left_Valid_Mean', 'Right_Valid_Mean',
                                'Total_Rows_All', 'Active_Duration_Total_s', 'Total_Gaps']

# Add a note column
participant_summary['Note'] = ''
for idx in participant_summary.index:
    notes = []
    if participant_summary.loc[idx, 'Valid_Gaze_Mean'] < 85:
        notes.append('LOW_AVG_VALID')
    if participant_summary.loc[idx, 'Valid_Gaze_Min'] < 70:
        notes.append('HAS_LOW_PHASE')
    if participant_summary.loc[idx, 'Valid_Gaze_Std'] > 15:
        notes.append('HIGH_VARIABILITY')
    participant_summary.loc[idx, 'Note'] = ', '.join(notes) if notes else 'OK'

participant_summary = participant_summary.sort_values('Valid_Gaze_Mean', ascending=True)
print(participant_summary.to_string())
