# Create results tables

In [None]:
import pandas as pd
from pathlib import Path
from collections import defaultdict

model_order = [
    "mimir-project/mimir-7b-books",
    "mimir-project/mimir-7b-factual",
    "mimir-project/mimir-7b-fiction",
    "mimir-project/mimir-7b-newspapers",
    "mimir-project/mimir-7b-nonfiction",
    "mimir-project/mimir-7b-rightholders",
    "mimir-project/mimir-7b-translated",
    "mimir-project/mimir-7b-untranslated",
    "mimir-project/mimir-7b-untranslated-withnewspapers",
    "mimir-project/mimir-mistral-7b-base",
    "mimir-project/mimir-mistral-7b-base-instruct",
    "mimir-project/mimir-mistral-7b-base-scratch",
    "mimir-project/mimir-mistral-7b-base-scratch-instruct",
    "mimir-project/mimir-mistral-7b-extended",
    "mimir-project/mimir-mistral-7b-extended-scratch",
    "mimir-project/mimir-mistral-7b-extended-scratch-instruct",
    "mimir-project/mimir-mistral-7b-extended-instruct",
    "mistralai/Mistral-7B-v0.1",
    "mimir-project/mimir-mistral-1b-base-scratch",
    "mimir-project/mimir-mistral-1b-extended-scratch",
    "mimir-project/mimir-mistral-250m-base-scratch",
    "mimir-project/mimir-mistral-250m-extended-scratch",
    "mimir-project/mimir-mistral-7b-core-scratch",
    "mimir-project/mimir-mistral-7b-core",

]

scores_to_keep = [
    "compression_ratio_nob",
    "compression_ratio_nno",
    "lix_score_nob",
    "lix_score_nno",
    "self_bleu_nob",
    "self_bleu_nno",
]


def data_to_df(data: dict[str, str | float]) -> pd.DataFrame:
    df = pd.DataFrame(data)
    # only keep relevant columns
    df = df[["model"] + scores_to_keep]

    # sort rows by model order
    df["model"] = pd.Categorical(df["model"], categories=model_order, ordered=True)
    df = df.sort_values("model").reset_index(drop=True)

    # rename model_column
    df["model"] = df.model.apply(
        lambda x: x.split("/")[1] if x.startswith("mimir-project") else x
    )
    return df


def minmax_lix(lix: float) -> float:
    return 100 * (60 - lix) / (60 - 20)


def inverse_compression_ratio(comp_ratio: float) -> float:
    return 100 * (1 / comp_ratio)


def inverse_self_bleu(self_bleu: float) -> float:
    return 100 * (1 - self_bleu)


def normalize_and_rank(df: pd.DataFrame) -> pd.DataFrame:
    """Inverse compression ratio and self_bleu. Minmax lix. Add rank column based on inversed and minmax scores"""
    df["inverse_compression_nob"] = df.compression_ratio_nob.apply(
        inverse_compression_ratio
    )
    df["inverse_compression_nno"] = df.compression_ratio_nno.apply(
        inverse_compression_ratio
    )
    df["min_max_lix_nob"] = df.lix_score_nob.apply(minmax_lix)
    df["min_max_lix_nno"] = df.lix_score_nno.apply(minmax_lix)

    df["inverse_sb_nob"] = df.self_bleu_nob.apply(inverse_self_bleu)
    df["inverse_sb_nno"] = df.self_bleu_nno.apply(inverse_self_bleu)

    df["average"] = df[
        [
            "inverse_compression_nob",
            "inverse_compression_nno",
            "min_max_lix_nob",
            "min_max_lix_nno",
            "inverse_sb_nob",
            "inverse_sb_nno",
        ]
    ].mean(axis=1)

    df["rank"] = df["average"].rank(ascending=False)
    return df

## Create greedy table

In [None]:
def results_file_to_scores(results_file: Path) -> dict[str, float]:
    df = pd.read_json(results_file, lines=True)
    return {
        k: v
        for _, df_ in df.groupby("dataset")
        for k, v in df_.results.item()[0].items()
        if k in scores_to_keep
    }


def get_data(data_path: Path) -> dict[str, list[str | float]]:
    data = defaultdict(list)
    for e in data_path.glob("*/*/"):
        if not e.is_dir:
            continue
        model_name = f"{e.parent.name}/{e.name}"
        if model_name not in model_order:
            continue

        results_file = next(e.glob("evaluate_all/results.jsonl"), None)
        if results_file is None:
            print(f"No results file in {e}")
            continue
        scores = results_file_to_scores(results_file)
        data["model"].append(model_name)
        for score, value in scores.items():
            data[score].append(value)
    return data


def get_10x_data(data_path: Path) -> dict[str, list[str | float]]:
    """Get average scores of 10 runs"""
    data = defaultdict(list)
    for e in data_path.glob("*/*/"):
        if not e.is_dir:
            continue
        model_name = f"{e.parent.name}/{e.name}"
        if model_name not in model_order:
            continue
        results_files = list(e.glob("*/results.jsonl"))
        assert len(results_files) == 10
        scores_sums = defaultdict(float)
        for f in results_files:
            scores = results_file_to_scores(f)
            for k, v in scores.items():
                scores_sums[k] += v
        scores_avg = {k: v / 10 for k, v in scores_sums.items()}
        data["model"].append(model_name)
        for score, value in scores_avg.items():
            data[score].append(value)
    return data


greedy_p = Path("../results/greedy/")
data = get_data(greedy_p)
df = data_to_df(data)
df = normalize_and_rank(df)
df.to_csv(greedy_p / "results_table.csv", index=False)
df

In [None]:
contrastive_10x_p = Path("../results/contrastive-10x/")
data = get_10x_data(contrastive_10x_p)
df = data_to_df(data)
df = normalize_and_rank(df)
df.to_csv(contrastive_10x_p / "results_table.csv", index=False)
df