In [2]:
import json
import pandas as pd
from IPython.display import display, Markdown

def read_experiment_data(file_path):
    """读取实验数据文件"""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data


def extract_experiment_info(experiments):
    """提取实验信息"""
    results = []

    for exp in experiments:
        config = exp.get("config", {})
        result = exp.get("result", {})

        # 提取所需的参数
        eval_dataset = config.get("eval_dataset", "N/A")
        edge_cloud_bandwidth = config.get("edge_cloud_bandwidth", 0)
        edge_end_bandwidth = config.get("edge_end_bandwidth", 0)
        eval_mode = result.get("eval_mode", "")
        accuracy = result.get("accuracy", "N/A")  # 提取 accuracy
        target_forward_times = result.get("target_forward_times", 0)
        wall_time = result.get("wall_time", 0)
        communication_time = result.get("communication_time", 0)
        edge_cloud_data_bytes = result.get("edge_cloud_data_bytes", 0)
        throughput = result.get("throughput", 0)
        comm_energy = result.get("comm_energy", 0)

        results.append(
            {
                "数据集": eval_dataset,
                "Edge-Cloud 带宽 (Mbps)": edge_cloud_bandwidth,
                "Edge-End 带宽 (Mbps)": edge_end_bandwidth,
                "方法": eval_mode,
                "准确率": accuracy,  # 添加到结果中
                "目标模型 Forward 次数": target_forward_times,
                "挂钟时间 (s)": round(wall_time, 2),
                "通信时间 (s)": round(communication_time, 2),
                "Edge-Cloud 数据传输量 (bytes)": edge_cloud_data_bytes,
                "Token 输出速率 (tokens/s)": round(throughput, 2),
                "通信能量": comm_energy,
            }
        )

    return results

def process_data(experiment_data):
    """处理实验数据，计算相对算力消耗并转换为DataFrame"""
    df = pd.DataFrame(experiment_data)

    # 找到 large 方法的基准值
    # 假设每个数据集都有一个 large 实验作为基准
    large_df = df[df['方法'] == 'large'][['数据集', '目标模型 Forward 次数']].drop_duplicates(subset=['数据集'])
    large_df = large_df.rename(columns={'目标模型 Forward 次数': 'baseline_forwards'})

    # 合并基准值
    df = pd.merge(df, large_df, on='数据集', how='left')

    # 计算相对算力消耗
    df['相对 large 算力消耗 (%)'] = df.apply(
        lambda row: round((row['目标模型 Forward 次数'] / row['baseline_forwards'] * 100), 2)
        if pd.notnull(row['baseline_forwards']) and row['baseline_forwards'] > 0
        else None,
        axis=1
    )
    
    # 清理临时列
    if 'baseline_forwards' in df.columns:
        del df['baseline_forwards']
    
    return df

def generate_markdown_table(df):
    """生成 markdown 表格"""
    # 按方法分组，然后按Edge-Cloud带宽降序排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )

    # 生成 markdown 表格
    markdown_table = df_sorted.to_markdown(index=False, tablefmt="github")
    return markdown_table


def get_pandas_table(df):
    """生成 pandas DataFrame 表格"""
    # 按方法分组，然后按Edge-Cloud带宽降序排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )
    return df_sorted


def generate_grouped_tables(df):
    """按方法分组生成多个表格"""
    # 按方法分组
    grouped = df.groupby("方法")

    all_tables = []

    for method, group_df in grouped:
        # 按Edge-Cloud带宽降序排序
        group_df_sorted = group_df.sort_values(
            "Edge-Cloud 带宽 (Mbps)", ascending=False
        )

        # 生成表格
        table = group_df_sorted.to_markdown(index=False, tablefmt="github")

        all_tables.append(f"## 方法: {method}\n\n{table}\n")

    return "\n".join(all_tables)


def generate_dataset_grouped_tables(df):
    """按数据集分组生成多个表格"""
    # 按数据集分组
    grouped = df.groupby("数据集")

    all_tables = []

    for dataset, group_df in grouped:
        # 排序：按方法排序，带宽降序
        group_df_sorted = group_df.sort_values(
            ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
        )

        # 生成表格
        table = group_df_sorted.to_markdown(index=False, tablefmt="github")

        all_tables.append(f"## 数据集: {dataset}\n\n{table}\n")

    return "\n".join(all_tables)


def jupyter_display(df):
    """
    一个专门为 Jupyter Notebook 设计的主函数，
    用于读取、处理数据并以美观的表格形式显示结果。
    """
    # --- 1. 显示统一排序的表格 ---
    # 使用 display(Markdown(...)) 来渲染标题
    display(Markdown("# 实验结果对比表（按方法和带宽排序）"))

    # 对 DataFrame 进行排序
    df_sorted = df.sort_values(
        ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
    )

    # 直接显示 DataFrame 对象，Jupyter 会将其渲染为 HTML 表格
    display(df_sorted)
    
    # --- 2. 显示按方法分组的表格 ---
    display(Markdown("# 实验结果对比表（按方法分组）"))

    # 按方法进行分组
    grouped = df.groupby("方法")
    # 遍历每个分组并分别显示
    for method, group_df in grouped:
        # 显示分组标题
        display(Markdown(f"## 方法: {method}"))

        # 对当前分组的 DataFrame 进行排序
        group_df_sorted = group_df.sort_values(
            "Edge-Cloud 带宽 (Mbps)", ascending=False
        )

        # 显示排序后的分组 DataFrame
        display(group_df_sorted)

    # --- 3. 显示按数据集分组的表格 ---
    display(Markdown("# 实验结果对比表（按数据集分组）"))

    # 按数据集进行分组
    grouped_ds = df.groupby("数据集")
    for dataset, group_df in grouped_ds:
        # 显示分组标题
        display(Markdown(f"## 数据集: {dataset}"))

        # 排序：按方法排序，带宽降序
        group_df_sorted = group_df.sort_values(
            ["方法", "Edge-Cloud 带宽 (Mbps)"], ascending=[True, False]
        )

        # 显示排序后的分组 DataFrame
        display(group_df_sorted)


def main():
    # 读取实验数据
    file_path = "experiment_summary_20260122_211331.json"
    experiments = read_experiment_data(file_path)
    experiment_data = extract_experiment_info(experiments)

    # process data to dataframe and add new metrics
    df = process_data(experiment_data)

    unified_table = generate_markdown_table(df)
    method_grouped_tables = generate_grouped_tables(df)
    dataset_grouped_tables = generate_dataset_grouped_tables(df)

    jupyter_display(df)

    # 保存到文件
    with open("experiment_results_table.md", "w", encoding="utf-8") as f:
        f.write("# 实验结果对比表\n\n")
        f.write("## 1. 统一对比表（按方法和带宽排序）\n\n")
        f.write(unified_table)
        f.write("\n\n" + "=" * 80 + "\n\n")
        f.write("## 2. 按方法分组表格\n\n")
        f.write(method_grouped_tables)
        f.write("\n\n" + "=" * 80 + "\n\n")
        f.write("## 3. 按数据集分组表格\n\n")
        f.write(dataset_grouped_tables)

    print("\n表格已保存到 experiment_results_table.md 文件中")


if __name__ == "__main__":
    main()


# 实验结果对比表（按方法和带宽排序）

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
20,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,0.325,2277,449.94,188.54,686608,23.22,0.0,22.24
21,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,0,4476,539.64,0.47,1897368,38.87,0.0,21.86
22,eval/eval_cnndm.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.2836086749786775, 'rouge2': 0.124...",1938,282.61,0.22,892494,37.07,0.0,18.93
23,eval/eval_xsum.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.14652722471333562, 'rouge2': 0.02...",1972,258.76,0.21,839766,40.45,0.0,19.26
24,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,0.0125,2085,439.4,185.91,724452,23.97,0.0,20.36
10,eval/eval_humaneval.py,34.6,563,dist_spec,0.325,2500,1901.72,1655.68,9862202,5.38,0.0,24.41
11,eval/eval_cnndm.py,34.6,563,dist_spec,"{'rouge1': 0.28424497032430085, 'rouge2': 0.12...",2370,270.68,4.78,20671598,37.83,0.0,23.14
12,eval/eval_mt_bench_noeval.py,34.6,563,dist_spec,0,5329,557.39,5.98,25857254,36.74,0.0,26.02
13,eval/eval_gsm8k.py,34.6,563,dist_spec,0.0125,2563,1966.2,1713.42,9584112,5.21,0.0,25.03
14,eval/eval_xsum.py,34.6,563,dist_spec,"{'rouge1': 0.14709188110710264, 'rouge2': 0.02...",2403,272.21,4.3,18618190,37.62,0.0,23.47


# 实验结果对比表（按方法分组）

## 方法: adaptive_tridecoding

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
20,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,0.325,2277,449.94,188.54,686608,23.22,0.0,22.24
21,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,0,4476,539.64,0.47,1897368,38.87,0.0,21.86
22,eval/eval_cnndm.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.2836086749786775, 'rouge2': 0.124...",1938,282.61,0.22,892494,37.07,0.0,18.93
23,eval/eval_xsum.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.14652722471333562, 'rouge2': 0.02...",1972,258.76,0.21,839766,40.45,0.0,19.26
24,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,0.0125,2085,439.4,185.91,724452,23.97,0.0,20.36


## 方法: dist_spec

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
10,eval/eval_humaneval.py,34.6,563,dist_spec,0.325,2500,1901.72,1655.68,9862202,5.38,0.0,24.41
11,eval/eval_cnndm.py,34.6,563,dist_spec,"{'rouge1': 0.28424497032430085, 'rouge2': 0.12...",2370,270.68,4.78,20671598,37.83,0.0,23.14
12,eval/eval_mt_bench_noeval.py,34.6,563,dist_spec,0,5329,557.39,5.98,25857254,36.74,0.0,26.02
13,eval/eval_gsm8k.py,34.6,563,dist_spec,0.0125,2563,1966.2,1713.42,9584112,5.21,0.0,25.03
14,eval/eval_xsum.py,34.6,563,dist_spec,"{'rouge1': 0.14709188110710264, 'rouge2': 0.02...",2403,272.21,4.3,18618190,37.62,0.0,23.47


## 方法: dist_split_spec

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
5,eval/eval_humaneval.py,34.6,563,dist_split_spec,0.325,2500,2738.1,2492.33,547472,3.74,0.0,24.41
6,eval/eval_cnndm.py,34.6,563,dist_split_spec,"{'rouge1': 0.28424497032430085, 'rouge2': 0.12...",2370,263.73,0.09,399188,38.83,0.0,23.14
7,eval/eval_mt_bench_noeval.py,34.6,563,dist_split_spec,0,5329,550.67,0.33,1421306,37.19,0.0,26.02
8,eval/eval_gsm8k.py,34.6,563,dist_split_spec,0.0125,2563,2796.45,2544.34,599474,3.66,0.0,25.03
9,eval/eval_xsum.py,34.6,563,dist_split_spec,"{'rouge1': 0.14709188110710264, 'rouge2': 0.02...",2403,260.77,0.1,411298,39.27,0.0,23.47


## 方法: large

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
0,eval/eval_humaneval.py,34.6,563,large,0.3375,10240,454.75,0.0,0,22.52,0.0,100.0
1,eval/eval_mt_bench_noeval.py,34.6,563,large,0,20480,925.96,0.0,0,22.12,0.0,100.0
2,eval/eval_cnndm.py,34.6,563,large,"{'rouge1': 0.28717835047102713, 'rouge2': 0.12...",10240,494.5,0.0,0,20.71,0.0,100.0
3,eval/eval_gsm8k.py,34.6,563,large,0.0125,10240,454.08,0.0,0,22.55,0.0,100.0
4,eval/eval_xsum.py,34.6,563,large,"{'rouge1': 0.14265407624458998, 'rouge2': 0.02...",10240,484.85,0.0,0,21.12,0.0,100.0


## 方法: uncertainty_decoding

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
15,eval/eval_humaneval.py,34.6,563,uncertainty_decoding,0.3,264,3436.09,3122.1,8222730,2.98,0.0,2.58
16,eval/eval_mt_bench_noeval.py,34.6,563,uncertainty_decoding,0,561,6918.07,6263.68,27151766,2.96,0.0,2.74
17,eval/eval_cnndm.py,34.6,563,uncertainty_decoding,"{'rouge1': 0.2434004871810022, 'rouge2': 0.094...",248,3494.61,3141.87,33177718,2.93,0.0,2.42
18,eval/eval_xsum.py,34.6,563,uncertainty_decoding,"{'rouge1': 0.1497173225813533, 'rouge2': 0.029...",269,3495.74,3150.66,27923540,2.93,0.0,2.63
19,eval/eval_gsm8k.py,34.6,563,uncertainty_decoding,0.0,346,3490.22,3172.72,7443472,2.93,0.0,3.38


# 实验结果对比表（按数据集分组）

## 数据集: eval/eval_cnndm.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
22,eval/eval_cnndm.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.2836086749786775, 'rouge2': 0.124...",1938,282.61,0.22,892494,37.07,0.0,18.93
11,eval/eval_cnndm.py,34.6,563,dist_spec,"{'rouge1': 0.28424497032430085, 'rouge2': 0.12...",2370,270.68,4.78,20671598,37.83,0.0,23.14
6,eval/eval_cnndm.py,34.6,563,dist_split_spec,"{'rouge1': 0.28424497032430085, 'rouge2': 0.12...",2370,263.73,0.09,399188,38.83,0.0,23.14
2,eval/eval_cnndm.py,34.6,563,large,"{'rouge1': 0.28717835047102713, 'rouge2': 0.12...",10240,494.5,0.0,0,20.71,0.0,100.0
17,eval/eval_cnndm.py,34.6,563,uncertainty_decoding,"{'rouge1': 0.2434004871810022, 'rouge2': 0.094...",248,3494.61,3141.87,33177718,2.93,0.0,2.42


## 数据集: eval/eval_gsm8k.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
24,eval/eval_gsm8k.py,34.6,563,adaptive_tridecoding,0.0125,2085,439.4,185.91,724452,23.97,0.0,20.36
13,eval/eval_gsm8k.py,34.6,563,dist_spec,0.0125,2563,1966.2,1713.42,9584112,5.21,0.0,25.03
8,eval/eval_gsm8k.py,34.6,563,dist_split_spec,0.0125,2563,2796.45,2544.34,599474,3.66,0.0,25.03
3,eval/eval_gsm8k.py,34.6,563,large,0.0125,10240,454.08,0.0,0,22.55,0.0,100.0
19,eval/eval_gsm8k.py,34.6,563,uncertainty_decoding,0.0,346,3490.22,3172.72,7443472,2.93,0.0,3.38


## 数据集: eval/eval_humaneval.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
20,eval/eval_humaneval.py,34.6,563,adaptive_tridecoding,0.325,2277,449.94,188.54,686608,23.22,0.0,22.24
10,eval/eval_humaneval.py,34.6,563,dist_spec,0.325,2500,1901.72,1655.68,9862202,5.38,0.0,24.41
5,eval/eval_humaneval.py,34.6,563,dist_split_spec,0.325,2500,2738.1,2492.33,547472,3.74,0.0,24.41
0,eval/eval_humaneval.py,34.6,563,large,0.3375,10240,454.75,0.0,0,22.52,0.0,100.0
15,eval/eval_humaneval.py,34.6,563,uncertainty_decoding,0.3,264,3436.09,3122.1,8222730,2.98,0.0,2.58


## 数据集: eval/eval_mt_bench_noeval.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
21,eval/eval_mt_bench_noeval.py,34.6,563,adaptive_tridecoding,0,4476,539.64,0.47,1897368,38.87,0.0,21.86
12,eval/eval_mt_bench_noeval.py,34.6,563,dist_spec,0,5329,557.39,5.98,25857254,36.74,0.0,26.02
7,eval/eval_mt_bench_noeval.py,34.6,563,dist_split_spec,0,5329,550.67,0.33,1421306,37.19,0.0,26.02
1,eval/eval_mt_bench_noeval.py,34.6,563,large,0,20480,925.96,0.0,0,22.12,0.0,100.0
16,eval/eval_mt_bench_noeval.py,34.6,563,uncertainty_decoding,0,561,6918.07,6263.68,27151766,2.96,0.0,2.74


## 数据集: eval/eval_xsum.py

Unnamed: 0,数据集,Edge-Cloud 带宽 (Mbps),Edge-End 带宽 (Mbps),方法,准确率,目标模型 Forward 次数,挂钟时间 (s),通信时间 (s),Edge-Cloud 数据传输量 (bytes),Token 输出速率 (tokens/s),通信能量,相对 large 算力消耗 (%)
23,eval/eval_xsum.py,34.6,563,adaptive_tridecoding,"{'rouge1': 0.14652722471333562, 'rouge2': 0.02...",1972,258.76,0.21,839766,40.45,0.0,19.26
14,eval/eval_xsum.py,34.6,563,dist_spec,"{'rouge1': 0.14709188110710264, 'rouge2': 0.02...",2403,272.21,4.3,18618190,37.62,0.0,23.47
9,eval/eval_xsum.py,34.6,563,dist_split_spec,"{'rouge1': 0.14709188110710264, 'rouge2': 0.02...",2403,260.77,0.1,411298,39.27,0.0,23.47
4,eval/eval_xsum.py,34.6,563,large,"{'rouge1': 0.14265407624458998, 'rouge2': 0.02...",10240,484.85,0.0,0,21.12,0.0,100.0
18,eval/eval_xsum.py,34.6,563,uncertainty_decoding,"{'rouge1': 0.1497173225813533, 'rouge2': 0.029...",269,3495.74,3150.66,27923540,2.93,0.0,2.63



表格已保存到 experiment_results_table.md 文件中
