In [4]:
import json
import os
from typing import Dict

def calculate_model_statistics(evaluation_results_dir: str = "/home/zhangxiaobin/text2image/evaluation_results_grok41") -> Dict:
    # 评估维度
    evaluation_dimensions = ["感知质量", "文本-图像一致性", "菜品完整性", "语义合理性", "摆盘与食品美学","综合评价"]
    
    # 存储所有模型的统计结果
    model_stats = {}
    
    # 遍历每个模型文件夹
    for model_name in os.listdir(evaluation_results_dir):
        model_path = os.path.join(evaluation_results_dir, model_name)
        if not os.path.isdir(model_path):
            continue
        print(f"正在处理模型: {model_name}")
        
        model_scores = {dim: [] for dim in evaluation_dimensions}
        
        # 读取前20个JSON文件
        for i in range(20):
            json_file = os.path.join(model_path, f"evaluation_{i}.json")
            
            if os.path.exists(json_file):
                try:
                    with open(json_file, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    evaluation = data.get("evaluation", {})
                    
                    # 提取各维度得分
                    for dim in evaluation_dimensions:
                        if dim in evaluation:
                            if dim == "综合评价":
                                # 综合评价使用"最终得分"字段
                                score = evaluation[dim].get("最终得分", 0)
                            else:
                                # 其他维度使用"得分"字段
                                score = evaluation[dim].get("得分", 0)
                            model_scores[dim].append(score)
                        else:
                            print(f"警告: {json_file} 中缺少维度 '{dim}'")
                            
                except Exception as e:
                    print(f"错误: 读取文件 {json_file} 时出错: {e}")
            else:
                print(f"警告: 文件 {json_file} 不存在")
        
        # 计算平均分
        model_averages = {}
        for dim in evaluation_dimensions:
            if model_scores[dim]:
                avg_score = sum(model_scores[dim]) / len(model_scores[dim])
                model_averages[dim] = {
                    "平均分": round(avg_score, 2),
                    "样本数": len(model_scores[dim]),
                    "最高分": max(model_scores[dim]),
                    "最低分": min(model_scores[dim])
                }
            else:
                model_averages[dim] = {
                    "平均分": 0,
                    "样本数": 0,
                    "最高分": 0,
                    "最低分": 0
                }
        
        model_stats[model_name] = model_averages
        print(f"模型 {model_name} 处理完成")
    
    return model_stats


def save_statistics_to_csv(model_stats: Dict, output_file: str = "model_evaluation_statistics.csv"):
    """
    将统计结果保存为CSV文件（不依赖pandas）
    """
    evaluation_dimensions = ["感知质量", "文本-图像一致性", "菜品完整性", "语义合理性", "摆盘与食品美学","综合评价"]
    
    with open(output_file, 'w', encoding='utf-8-sig') as f:
        # 写入表头
        f.write("模型,评估维度,平均分,样本数,最高分,最低分\n")
        
        # 写入数据
        for model_name, stats in model_stats.items():
            for dim in evaluation_dimensions:
                dim_stats = stats[dim]
                f.write(f"{model_name},{dim},{dim_stats['平均分']},{dim_stats['样本数']},{dim_stats['最高分']},{dim_stats['最低分']}\n")
    
    print(f"统计结果已保存到: {output_file}")


def main():
    """
    主函数
    """
    print("开始统计模型评分...")
    
    # 计算统计结果
    model_stats = calculate_model_statistics()
    save_statistics_to_csv(model_stats)
    
    return model_stats

if __name__ == "__main__":
    # 运行统计分析
    stats = main()

开始统计模型评分...
正在处理模型: seedream-v4
模型 seedream-v4 处理完成
正在处理模型: wan-25-preview_opz
模型 wan-25-preview_opz 处理完成
正在处理模型: wan-25-preview
模型 wan-25-preview 处理完成
正在处理模型: imagen4-preview_opz
模型 imagen4-preview_opz 处理完成
正在处理模型: qwen-image_opz
模型 qwen-image_opz 处理完成
正在处理模型: imagen4-preview
模型 imagen4-preview 处理完成
正在处理模型: seedream-v4_opz
模型 seedream-v4_opz 处理完成
正在处理模型: qwen-image
模型 qwen-image 处理完成
统计结果已保存到: model_evaluation_statistics.csv
