In [None]:
"""
分析情绪识别可用帧比例，用于回答审稿人关于质量控制的问题
计算每个受试者的可用帧比例，并进行组间比较
"""

import pandas as pd
import numpy as np
from scipy import stats
from pathlib import Path
import os



def analyze_emotion_usable_frames():
    """分析情绪识别的可用帧比例"""
    emotion_dir = "result/Emotions_smooth2"
    demo_df = pd.read_csv("result/demographics.csv", encoding='utf-8-sig')
    
    results = []
    
    for idx, (_, row) in enumerate(demo_df.iterrows(), 1):
        subject = row['姓名']
        group = row['组别']
        group_label = 'ASD' if group == 1 else 'TD'
        
        emotion_file = os.path.join(emotion_dir, f"{subject}.csv")
        if not os.path.exists(emotion_file):
            continue
        
        try:
            # 读取情绪数据（跳过前2行头信息）
            with open(emotion_file, 'r', encoding='utf-8-sig') as f:
                next(f)
                next(f)
                emotion_df = pd.read_csv(f)
            
            total_frames = len(emotion_df)
            # 统计有效情绪帧（排除'undefined'）
            valid_frames = len(emotion_df[emotion_df['Emotion'] != 'undefined'])
            usable_ratio = (valid_frames / total_frames * 100) if total_frames > 0 else 0
            
            results.append({
                '姓名': subject,
                '组别': group_label,
                '总帧数': total_frames,
                '有效帧数': valid_frames,
                '可用帧比例': usable_ratio
            })
        except Exception as e:
            continue
    
    results_df = pd.DataFrame(results)
    
    if len(results_df) == 0:
        return None
    
    # 按组统计和比较
    print("="*60)
    print("组间比较结果:")
    print("="*60)
    
    # 获取数据
    asd_data = results_df[results_df['组别'] == 'ASD']['可用帧比例'].dropna()
    td_data = results_df[results_df['组别'] == 'TD']['可用帧比例'].dropna()
    
    print(f"\nASD组 (n={len(asd_data)}):")
    print(f"  平均可用帧比例: {asd_data.mean():.2f} ± {asd_data.std():.2f}%")
    
    print(f"\nTD组 (n={len(td_data)}):")
    print(f"  平均可用帧比例: {td_data.mean():.2f} ± {td_data.std():.2f}%")

    # 组间比较
    if len(asd_data) > 0 and len(td_data) > 0:
        stat, p_value = stats.mannwhitneyu(asd_data, td_data, alternative='two-sided')
        
        # 计算效应量
        pooled_std = np.sqrt(((len(asd_data) - 1) * asd_data.std()**2 + 
                              (len(td_data) - 1) * td_data.std()**2) / 
                             (len(asd_data) + len(td_data) - 2))
        cohens_d = (asd_data.mean() - td_data.mean()) / pooled_std if pooled_std > 0 else 0
        
        print(f"\n组间比较 (Mann-Whitney U检验):")
        print(f"  统计量: {stat:.4f}")
        print(f"  P值: {p_value:.6f}")
        print(f"  显著性: {'有显著差异 (p < 0.05)' if p_value < 0.05 else '无显著差异 (p ≥ 0.05)'}")
    
    # 保存结果
    output_dir = Path('result/QC_Analysis')
    output_dir.mkdir(exist_ok=True)
    output_file = output_dir / 'emotion_usable_frames.csv'
    results_df.to_csv(output_file, index=False, encoding='utf-8-sig')
    
    return results_df, asd_data, td_data

if __name__ == "__main__":
    results_df, asd_data, td_data = analyze_emotion_usable_frames()



组间比较结果:

ASD组 (n=99):
  平均可用帧比例: 76.16 ± 15.29%

TD组 (n=85):
  平均可用帧比例: 78.07 ± 16.49%

组间比较 (Mann-Whitney U检验):
  统计量: 4017.0000
  P值: 0.597839
  显著性: 无显著差异 (p ≥ 0.05)
