In [8]:
import json
import pandas as pd
from IPython.display import display, Markdown

def read_experiment_data(file_path):
    """读取实验数据文件"""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data


def extract_experiment_info(experiments):
    """提取实验信息"""
    results = []

    for exp in experiments:
        config = exp.get("config", {})
        result = exp.get("result", {})

        # 提取所需的参数
        eval_dataset = config.get("eval_dataset", "N/A")
        edge_cloud_bandwidth = config.get("edge_cloud_bandwidth", 0)
        edge_end_bandwidth = config.get("edge_end_bandwidth", 0)
        eval_mode = result.get("eval_mode", "")
        accuracy = result.get("accuracy", "N/A")  # 提取 accuracy
        target_forward_times = result.get("target_forward_times", 0)
        generated_tokens = result.get("generated_tokens", 0)
        wall_time = result.get("wall_time", 0)
        communication_time = result.get("communication_time", 0)
        queuing_time = result.get("queuing_time", 0)
        edge_cloud_data_bytes = result.get("edge_cloud_data_bytes", 0)
        throughput = result.get("throughput", 0)
        comm_energy = result.get("comm_energy", 0)
        
        # 提取模型信息
        little_model = result.get("little_model") or config.get("little_model", "N/A")
        draft_model = result.get("draft_model") or config.get("draft_model", "N/A")
        target_model = result.get("target_model") or config.get("target_model", "N/A")
        
        # 为了美观，可以去掉路径前缀
        def simplify_model_name(name):
            if isinstance(name, str) and "/" in name:
                return name.split("/")[-1]
            return name
            
        little_model = simplify_model_name(little_model)
        draft_model = simplify_model_name(draft_model)
        target_model = simplify_model_name(target_model)

        # 提取接受率相关数据
        little_accepted_tokens = result.get("little_accepted_tokens", 0)
        little_generated_tokens = result.get("little_generated_tokens", 0)
        draft_accepted_tokens = result.get("draft_accepted_tokens", 0)
        draft_generated_tokens = result.get("draft_generated_tokens", 0)

        # 计算接受率
        draft_acc = (
            round(little_accepted_tokens / little_generated_tokens * 100, 2)
            if little_generated_tokens > 0
            else "N/A"
        )
        target_acc = (
            round(draft_accepted_tokens / draft_generated_tokens * 100, 2)
            if draft_generated_tokens > 0
            else "N/A"
        )

        # 兼容传统 Speculation
        if target_acc == "N/A" and "acceptance_rate" in result:
            target_acc = round(float(result["acceptance_rate"]) * 100, 2)

        results.append(
            {
                "数据集": eval_dataset,
                "Edge-Cloud 带宽 (Mbps)": edge_cloud_bandwidth,
                "Edge-End 带宽 (Mbps)": edge_end_bandwidth,
                "方法": eval_mode,
                "Little-Model": little_model,
                "Draft-Model": draft_model,
                "Target-Model": target_model,
                "准确率": accuracy,  # 添加到结果中
                "生成 Token 数": generated_tokens,
                "目标模型 Forward 次数": target_forward_times,
                "Draft 接受率 (%)": draft_acc,
                "Target 接受率 (%)": target_acc,
                "挂钟时间 (s)": round(wall_time, 2),
                "通信时间 (s)": round(communication_time, 2),
                "排队时间 (s)": round(queuing_time, 2),
                "Edge-Cloud 数据传输量 (bytes)": edge_cloud_data_bytes,
                "Token 输出速率 (tokens/s)": round(throughput, 2),
                "通信能量": comm_energy,
            }
        )

    return results

def process_data(experiment_data):
    """处理实验数据，计算相对算力消耗并转换为DataFrame"""
    df = pd.DataFrame(experiment_data)

    # 找到 large 方法的基准值
    # 假设每个数据集都有一个 large 实验作为基准
    large_df = df[df['方法'] == 'large'][['数据集', '目标模型 Forward 次数']].drop_duplicates(subset=['数据集'])
    large_df = large_df.rename(columns={'目标模型 Forward 次数': 'baseline_forwards'})

    # 合并基准值
    df = pd.merge(df, large_df, on='数据集', how='left')

    # 计算相对 large 算力消耗 (%)
    df['相对 large 算力消耗 (%)'] = df.apply(
        lambda row: round((row['目标模型 Forward 次数'] / row['baseline_forwards'] * 100), 2)
        if pd.notnull(row['baseline_forwards']) and row['baseline_forwards'] > 0
        else None,
        axis=1
    )
    
    # 核心修改：计算 Generation Offloading Rate (%)
    # 公式：1 - target_forward_times / generated_tokens
    df['Generation Offloading Rate (%)'] = df.apply(
        lambda row: round((1 - row['目标模型 Forward 次数'] / row['生成 Token 数']) * 100, 2)
        if row['生成 Token 数'] > 0 else 0,
        axis=1
    )


    
    # 清理临时列
    if 'baseline_forwards' in df.columns:
        del df['baseline_forwards']
    
    return df

def generate_markdown_table(df):
    """生成 markdown 表格"""
    # 按方法分组，然后按Edge-Cloud带宽降序排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )

    # 生成 markdown 表格
    markdown_table = df_sorted.to_markdown(index=False, tablefmt="github")
    return markdown_table


def get_pandas_table(df):
    """生成 pandas DataFrame 表格"""
    # 按方法分组，然后按Edge-Cloud带宽降序排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )
    return df_sorted


def generate_grouped_tables(df):
    """按方法分组生成多个表格"""
    # 按方法分组
    grouped = df.groupby("方法")

    all_tables = []

    for method, group_df in grouped:
        # 按Edge-Cloud带宽降序排序
        group_df_sorted = group_df.sort_values(
            "Edge-Cloud 带宽 (Mbps)", ascending=False
        )

        # 生成表格
        table = group_df_sorted.to_markdown(index=False, tablefmt="github")

        all_tables.append(f"## 方法: {method}\n\n{table}\n")

    return "\n".join(all_tables)


def generate_dataset_grouped_tables(df):
    """按数据集分组生成多个表格"""
    # 按数据集分组
    grouped = df.groupby("数据集")

    all_tables = []

    for dataset, group_df in grouped:
        # 排序：按方法排序，带宽降序
        group_df_sorted = group_df.sort_values(
            ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
        )

        # 生成表格
        table = group_df_sorted.to_markdown(index=False, tablefmt="github")

        all_tables.append(f"## 数据集: {dataset}\n\n{table}\n")

    return "\n".join(all_tables)


def jupyter_display(df):
    """
    一个专门为 Jupyter Notebook 设计的主函数，
    用于读取、处理数据并以美观的表格形式显示结果。
    """
    # --- 1. 显示统一排序的表格 ---
    # 使用 display(Markdown(...)) 来渲染标题
    display(Markdown("# 实验结果对比表（按方法和带宽排序）"))

    # 对 DataFrame 进行排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )

    # 直接显示 DataFrame 对象，Jupyter 会将其渲染为 HTML 表格
    display(df_sorted)
    
    # --- 2. 显示按方法分组的表格 ---
    display(Markdown("# 实验结果对比表（按方法分组）"))

    # 按方法进行分组
    grouped = df.groupby("方法")
    # 遍历每个分组并分别显示
    for method, group_df in grouped:
        # 显示分组标题
        display(Markdown(f"## 方法: {method}"))

        # 对当前分组的 DataFrame 进行排序
        group_df_sorted = group_df.sort_values(
            "Edge-Cloud 带宽 (Mbps)", ascending=False
        )

        # 显示排序后的分组 DataFrame
        display(group_df_sorted)

    # --- 3. 显示按数据集分组的表格 ---
    display(Markdown("# 实验结果对比表（按数据集分组）"))

    # 按数据集进行分组
    grouped_ds = df.groupby("数据集")
    for dataset, group_df in grouped_ds:
        # 显示分组标题
        display(Markdown(f"## 数据集: {dataset}"))

        # 排序：按方法排序，带宽降序
        group_df_sorted = group_df.sort_values(
            ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
        )

        # 显示排序后的分组 DataFrame
        display(group_df_sorted)


def main(specified_file=None):
    # 读取实验数据
    import glob
    import os
    
    file_path = None
    
    # 1. 优先使用用户指定的文件
    if specified_file:
        if os.path.exists(specified_file):
            file_path = specified_file
        else:
            print(f"指定的文件不存在: {specified_file}")
            return

    # 2. 如果没有指定文件，则寻找最新的 experiment_summary_*.json
    if not file_path:
        json_files = glob.glob("experiment_summary_*.json")
        if json_files:
            file_path = max(json_files, key=os.path.getmtime)
        else:
            # 3. 最后的兜底
            target_file = "experiment_summary_20260127_053240.json"
            if os.path.exists(target_file):
                file_path = target_file
            else:
                print("找不到任何实验总结 JSON 文件")
                return
            
    print(f"处理文件: {file_path}")

    experiments = read_experiment_data(file_path)
    experiment_data = extract_experiment_info(experiments)

    # process data to dataframe and add new metrics
    df = process_data(experiment_data)

    unified_table = generate_markdown_table(df)
    method_grouped_tables = generate_grouped_tables(df)
    dataset_grouped_tables = generate_dataset_grouped_tables(df)

    jupyter_display(df)

    # 保存到文件
    with open("experiment_results_table.md", "w", encoding="utf-8") as f:
        f.write("# 实验结果对比表\n\n")
        f.write("## 1. 统一对比表（按方法和带宽排序）\n\n")
        f.write(unified_table)
        f.write("\n\n" + "=" * 80 + "\n\n")
        f.write("## 2. 按方法分组表格\n\n")
        f.write(method_grouped_tables)
        f.write("\n\n" + "=" * 80 + "\n\n")
        f.write("## 3. 按数据集分组表格\n\n")
        f.write(dataset_grouped_tables)

    print("\n表格已保存到 experiment_results_table.md 文件中")


if __name__ == "__main__":
    # 如果想指定文件，请修改下一行，例如: main("experiment_summary_20260127_053240.json")
    main("qwen_exp.json")

处理文件: qwen_exp.json


# 实验结果对比表（按方法和带宽排序）

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
0,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10478,2423,71.18,69.67,626.0,0.42,121.15,1092258,16.74,0.0,,76.88
2,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10444,2252,70.33,70.83,598.58,0.36,112.6,984080,17.45,0.0,,78.44
3,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20839,5691,66.53,63.46,1409.46,0.72,284.55,2967604,14.79,0.0,,72.69
1,eval/eval_humaneval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10503,1898,59.36,50.56,707.79,0.43,94.9,1151322,14.84,0.0,,81.93
4,eval/eval_gsm8k.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10491,1782,61.0,54.0,679.72,0.37,89.1,1030074,15.43,0.0,,83.01
5,eval/eval_mt_bench_noeval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20906,4661,48.38,41.4,1810.32,0.78,233.05,3195486,11.55,0.0,,77.7


# 实验结果对比表（按方法分组）

## 方法: adaptive_tridecoding

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
0,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10478,2423,71.18,69.67,626.0,0.42,121.15,1092258,16.74,0.0,,76.88
2,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10444,2252,70.33,70.83,598.58,0.36,112.6,984080,17.45,0.0,,78.44
3,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20839,5691,66.53,63.46,1409.46,0.72,284.55,2967604,14.79,0.0,,72.69


## 方法: tridecoding

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
1,eval/eval_humaneval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10503,1898,59.36,50.56,707.79,0.43,94.9,1151322,14.84,0.0,,81.93
4,eval/eval_gsm8k.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10491,1782,61.0,54.0,679.72,0.37,89.1,1030074,15.43,0.0,,83.01
5,eval/eval_mt_bench_noeval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20906,4661,48.38,41.4,1810.32,0.78,233.05,3195486,11.55,0.0,,77.7


# 实验结果对比表（按数据集分组）

## 数据集: eval/eval_gsm8k.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
2,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10444,2252,70.33,70.83,598.58,0.36,112.6,984080,17.45,0.0,,78.44
4,eval/eval_gsm8k.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0125,10491,1782,61.0,54.0,679.72,0.37,89.1,1030074,15.43,0.0,,83.01


## 数据集: eval/eval_humaneval.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
0,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10478,2423,71.18,69.67,626.0,0.42,121.15,1092258,16.74,0.0,,76.88
1,eval/eval_humaneval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,10503,1898,59.36,50.56,707.79,0.43,94.9,1151322,14.84,0.0,,81.93


## 数据集: eval/eval_mt_bench_noeval.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,Little-Model,Draft-Model,Target-Model,准确率,生成 Token 数,目标模型 Forward 次数,Draft 接受率 (%),Target 接受率 (%),挂钟时间 (s),通信时间 (s),排队时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%),Generation Offloading Rate (%)
3,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20839,5691,66.53,63.46,1409.46,0.72,284.55,2967604,14.79,0.0,,72.69
5,eval/eval_mt_bench_noeval.py,34.6,563,tridecoding,Qwen3-0.6B,Qwen3-1.7B,Qwen3-14B,0.0,20906,4661,48.38,41.4,1810.32,0.78,233.05,3195486,11.55,0.0,,77.7



表格已保存到 experiment_results_table.md 文件中
