In [1]:
import re
import glob
import csv
import statistics
from pathlib import Path

def process_single_file(file_path):
    """处理单个文件并返回指标原始数据"""
    with open(file_path, 'r') as f:
        content = f.read()
    
    metrics = {
        'A_auc': [],
        'A_avg': [],
        'A_last': [],
        'F_last': []
    }
    
    # 分割并解析所有Summary块
    summaries = re.split(r'=+ Summary =+', content)
    for block in summaries:
        lines = block.strip().split('\n')
        for line in lines:
            if line.startswith('A_auc'):
                # 提取指标数值
                parts = re.findall(r'([A-Za-z_]+) (\d+\.\d+)', line)
                for key, value in parts:
                    if key in metrics:
                        metrics[key].append(float(value)*100)
    return metrics

def batch_process_files(pattern, csv_path="results.csv"):
    """批量处理文件并生成统计报告"""
    all_files = glob.glob(pattern)
    csv_data = []
    headers = [
        "Filename", 
        "A_auc_mean", "A_auc_std",
        "A_avg_mean", "A_avg_std",
        "A_last_mean", "A_last_std",
        "F_last_mean", "F_last_std"
    ]

    # 全局数据收集器
    global_collector = {
        'A_auc_means': [],
        'A_avg_means': [],
        'A_last_means': [],
        'F_last_means': []
    }

    for file_path in all_files:
        try:
            filename = Path(file_path).name
            raw_metrics = process_single_file(file_path)
            
            # 计算单文件统计量
            file_stats = {}
            for metric in raw_metrics:
                values = raw_metrics[metric]
                
                # 计算均值
                mean = sum(values)/len(values) if values else 0.0
                
                # 计算标准差
                try:
                    std = statistics.stdev(values) if len(values) >=2 else 0.0
                except:
                    std = 0.0
                
                # 存储结果
                file_stats[f"{metric}_mean"] = mean
                file_stats[f"{metric}_std"] = std
                
                # 收集全局数据
                global_collector[f"{metric}_means"].append(mean)

            # 构建CSV行
            csv_row = {
                "Filename": filename,
                "A_auc_mean": round(file_stats['A_auc_mean'], 4),
                "A_auc_std": round(file_stats['A_auc_std'], 4),
                "A_avg_mean": round(file_stats['A_avg_mean'], 4),
                "A_avg_std": round(file_stats['A_avg_std'], 4),
                "A_last_mean": round(file_stats['A_last_mean'], 4),
                "A_last_std": round(file_stats['A_last_std'], 4),
                "F_last_mean": round(file_stats['F_last_mean'], 4),
                "F_last_std": round(file_stats['F_last_std'], 4)
            }
            csv_data.append(csv_row)

        except Exception as e:
            print(f"处理文件 {file_path} 出错: {str(e)}")
            continue

    # 写入CSV文件
    with open(csv_path, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        writer.writeheader()
        writer.writerows(csv_data)
    
    # 计算全局统计
    global_stats = {}
    for metric in ['A_auc', 'A_avg', 'A_last', 'F_last']:
        means = global_collector[f"{metric}_means"]
        if means:
            global_mean = sum(means)/len(means)
            try:
                global_std = statistics.stdev(means)
            except:
                global_std = 0.0
        else:
            global_mean = global_std = 0.0
        
        global_stats[f"{metric}_global_mean"] = global_mean
        global_stats[f"{metric}_global_std"] = global_std

    # # 打印全局报告
    # print("\n=== 全局统计 ===")
    # for metric in ['A_auc', 'A_avg', 'A_last', 'F_last']:
    #     print(f"{metric}:")
    #     print(f"  平均值（跨文件）: {global_stats[f'{metric}_global_mean']:.4f}")
    #     print(f"  标准差（跨文件）: {global_stats[f'{metric}_global_std']:.4f}")

    return csv_data, global_stats

if __name__ == "__main__":
    # 示例：处理所有匹配文件
    # root_path = '/home/sunguanglong/DGIL/MISA/results/output/vit_others'
    # root_path = '/home/sunguanglong/DGIL/MISA/results/output/vit_in21k'
    root_path = '/home/sunguanglong/DGIL/MISA/results/output/vit_fly'
    save_path = 'results.csv'
    results, global_stats = batch_process_files(f"{root_path}/*.txt", save_path)