## RAWデータのテーブル化

In [32]:
import json
from pathlib import Path
from pprint import pprint

import pandas as pd

base_path = Path("assets")

model_dirs = {
    "Xwin-LM-7B-V0.1": {
        "family": "Xwin-LM",
        "variants": {
            "original": base_path / "Xwin-LM" / "Xwin-LM-7B-V0.1",
            "q4km": base_path / "Xwin-LM" / "Xwin-LM-7B-V0.1-gguf-q4km-hf",
        }
    },
    "Xwin-LM-13B-V0.1": {
        "family": "Xwin-LM",
        "variants": {
            "original": base_path / "Xwin-LM" / "Xwin-LM-13B-V0.1",
            "q4km": base_path / "Xwin-LM" / "Xwin-LM-13B-V0.1-gguf-q4km-hf",
        }
    },
    "ELYZA-japanese-Llama-2-7b-instruct": {
        "family": "elyza",
        "variants": {
            "original": base_path / "elyza" / "ELYZA-japanese-Llama-2-7b-instruct",
            "q4km": base_path / "elyza" / "ELYZA-japanese-Llama-2-7b-instruct-gguf-q4km-hf",
        }
    },
    "ELYZA-japanese-Llama-2-13b-instruct": {
        "family": "elyza",
        "variants": {
            "original": base_path / "elyza" / "ELYZA-japanese-Llama-2-13b-instruct",
            "q4km": base_path / "elyza" / "ELYZA-japanese-Llama-2-13b-instruct-gguf-q4km-hf",
        }
    },
    "calm2-7b-chat": {
        "family": "cyberagent",
        "variants": {
            "original": base_path / "cyberagent" / "calm2-7b-chat",
            "q4km": base_path / "cyberagent" / "calm2-7b-chat-gguf-q4km-hf",
        }
    },
    "japanese-stablelm-instruct-beta-7b": {
        "family": "stabilityai",
        "variants": {
            "original": base_path / "stabilityai" / "japanese-stablelm-instruct-beta-7b",
            "q4km": base_path / "stabilityai" / "japanese-stablelm-instruct-beta-7b-gguf-q4km-hf",
        }
    },
    "youri-7b-chat": {
        "family": "rinna",
        "variants": {
            "original": base_path / "rinna" / "youri-7b-chat",
            "q4km": base_path / "rinna" / "youri-7b-chat-gguf-q4km-hf",
        }
    },
    "Swallow-7b-instruct": {
        "family": "tokyotech-llm",
        "variants": {
            "original": base_path / "tokyotech-llm" / "Swallow-7b-instruct",
            "q4km": base_path / "tokyotech-llm" / "Swallow-7b-instruct-gguf-q4km-hf",
        }
    },
    "Swallow-13b-instruct": {
        "family": "tokyotech-llm",
        "variants": {
            "original": base_path / "tokyotech-llm" / "Swallow-13b-instruct",
            "q4km": base_path / "tokyotech-llm" / "Swallow-13b-instruct-gguf-q4km-hf",
        }
    },
}

In [33]:
def parse_jsonl_files(preds_path, results_path):
    data = []
    with open(preds_path, 'r', encoding='utf-8') as preds_file, \
         open(results_path, 'r', encoding='utf-8') as results_file:
        
        for no, (pred_line, result_line) in enumerate(zip(preds_file, results_file), start=1):
            pred_data = json.loads(pred_line)
            result_data = json.loads(result_line)
            
            data.append({
                "no": no,
                "result": pred_data["pred"],
                "reason": result_data["reason"],
                "grade": result_data["grade"]
            })
    return data

records = []

for model_name, info in model_dirs.items():
    model_family = info["family"]

    for variant, path in info["variants"].items():
        preds_path = path / "preds.jsonl"
        results_path = path / "result.jsonl"
        
        for entry in parse_jsonl_files(preds_path, results_path):
            records.append({
                "family": paths["family"],
                "model_name": model_name,
                "variant": variant,
                "no": entry["no"],
                "result": entry["result"],
                "reason": entry["reason"],
                "grade": entry["grade"]
            })

df_ = pd.DataFrame(records)


## モデル別平均スコアの集計

In [34]:
# Calculate the average and variance of grades grouped by family, model_name, and variant
df_summary_stats = df_raw_results.groupby(['family', 'model_name', 'variant'])['grade'].agg(['mean', 'var']).reset_index()

# Rename the columns to match your requirements
df_summary_stats.rename(columns={'mean': 'grade_avg', 'var': 'grade_variance'}, inplace=True)

df_summary_stats

Unnamed: 0,family,model_name,variant,grade_avg,grade_variance
0,Xwin-LM,Xwin-LM-13B-V0.1,original,2.43,1.661717
1,Xwin-LM,Xwin-LM-13B-V0.1,q4km,2.54,1.907475
2,Xwin-LM,Xwin-LM-7B-V0.1,original,2.32,1.654141
3,Xwin-LM,Xwin-LM-7B-V0.1,q4km,1.96,1.351919
4,cyberagent,calm2-7b-chat,original,2.26,1.709495
5,cyberagent,calm2-7b-chat,q4km,2.58,1.862222
6,elyza,ELYZA-japanese-Llama-2-13b-instruct,original,2.57,2.08596
7,elyza,ELYZA-japanese-Llama-2-13b-instruct,q4km,2.46,1.806465
8,elyza,ELYZA-japanese-Llama-2-7b-instruct,original,2.3,1.848485
9,elyza,ELYZA-japanese-Llama-2-7b-instruct,q4km,2.2,1.636364


variant を横持ちするようにピボットする

In [35]:
# First, ensure the 'grade_avg' is present in your DataFrame
# Assuming 'summary_stats' contains the necessary average grades
df_summary_wide = df_summary_stats.pivot_table(
    index=['family', 'model_name'],  # Columns to keep as-is
    columns='variant',  # Column to pivot
    values=['grade_avg', 'grade_variance'],  # The values to use when pivoting
    aggfunc='first'  # How to aggregate when pivoting, 'first' simply takes the first value found
)

# Flatten the columns and rename as needed
df_summary_wide.columns = [f'{metric}_{variant}' for metric, variant in df_summary_wide.columns]
df_summary_wide.reset_index(inplace=True)

df_summary_wide

Unnamed: 0,family,model_name,grade_avg_original,grade_avg_q4km,grade_variance_original,grade_variance_q4km
0,Xwin-LM,Xwin-LM-13B-V0.1,2.43,2.54,1.661717,1.907475
1,Xwin-LM,Xwin-LM-7B-V0.1,2.32,1.96,1.654141,1.351919
2,cyberagent,calm2-7b-chat,2.26,2.58,1.709495,1.862222
3,elyza,ELYZA-japanese-Llama-2-13b-instruct,2.57,2.46,2.08596,1.806465
4,elyza,ELYZA-japanese-Llama-2-7b-instruct,2.3,2.2,1.848485,1.636364
5,rinna,youri-7b-chat,1.93,1.81,1.722323,1.428182
6,stabilityai,japanese-stablelm-instruct-beta-7b,1.28,1.49,0.648081,0.959495
7,tokyotech-llm,Swallow-13b-instruct,2.12,1.84,1.763232,1.408485
8,tokyotech-llm,Swallow-7b-instruct,1.91,1.91,1.335253,1.496869


In [31]:
print(df_summary_wide.to_csv())

,family,model_name,grade_avg_original,grade_avg_q4km,grade_variance_original,grade_variance_q4km
0,Xwin-LM,Xwin-LM-13B-V0.1,2.43,2.54,1.6617171717171726,1.9074747474747487
1,Xwin-LM,Xwin-LM-7B-V0.1,2.32,1.96,1.6541414141414144,1.351919191919192
2,cyberagent,calm2-7b-chat,2.26,2.58,1.7094949494949496,1.862222222222222
3,elyza,ELYZA-japanese-Llama-2-13b-instruct,2.57,2.46,2.085959595959596,1.8064646464646466
4,elyza,ELYZA-japanese-Llama-2-7b-instruct,2.3,2.2,1.848484848484848,1.6363636363636371
5,rinna,youri-7b-chat,1.93,1.81,1.7223232323232323,1.428181818181818
6,stabilityai,japanese-stablelm-instruct-beta-7b,1.28,1.49,0.6480808080808079,0.9594949494949492
7,tokyotech-llm,Swallow-13b-instruct,2.12,1.84,1.7632323232323233,1.4084848484848482
8,tokyotech-llm,Swallow-7b-instruct,1.91,1.91,1.335252525252525,1.4968686868686873



出力された CSV を Excel にコピーし、棒グラフに加工する。

（Matplotlib でもよいが、Excel のほうが加工しやすい...）