# Create results tables

In [1]:
import pandas as pd
from pathlib import Path
from collections import defaultdict

model_order = [
    "mimir-project/mimir-7b-books",
    "mimir-project/mimir-7b-factual",
    "mimir-project/mimir-7b-fiction",
    "mimir-project/mimir-7b-newspapers",
    "mimir-project/mimir-7b-nonfiction",
    "mimir-project/mimir-7b-rightholders",
    "mimir-project/mimir-7b-translated",
    "mimir-project/mimir-7b-untranslated",
    "mimir-project/mimir-7b-untranslated-withnewspapers",
    "mimir-project/mimir-mistral-7b-base",
    "mimir-project/mimir-mistral-7b-base-instruct",
    "mimir-project/mimir-mistral-7b-base-scratch",
    "mimir-project/mimir-mistral-7b-base-scratch-instruct",
    "mimir-project/mimir-mistral-7b-core-scratch",
    "mimir-project/mimir-mistral-7b-core",
    "mimir-project/mimir-mistral-7b-core-scratch-instruct",
    "mimir-project/mimir-mistral-7b-extended",
    "mimir-project/mimir-mistral-7b-extended-scratch",
    "mimir-project/mimir-mistral-7b-extended-scratch-instruct",
    "mimir-project/mimir-mistral-7b-extended-instruct",
    "mistralai/Mistral-7B-v0.1",
    "mimir-project/mimir-mistral-1b-base-scratch",
    "mimir-project/mimir-mistral-1b-extended-scratch",
    "mimir-project/mimir-mistral-250m-base-scratch",
    "mimir-project/mimir-mistral-250m-extended-scratch",
]

scores_to_keep = [
    "compression_ratio_nob",
    "compression_ratio_nno",
    "lix_score_nob",
    "lix_score_nno",
    "self_bleu_nob",
    "self_bleu_nno",
]


def data_to_df(data: dict[str, str | float]) -> pd.DataFrame:
    df = pd.DataFrame(data)
    # only keep relevant columns
    df = df[["model"] + scores_to_keep]

    # sort rows by model order
    df["model"] = pd.Categorical(df["model"], categories=model_order, ordered=True)
    df = df.sort_values("model").reset_index(drop=True)

    # rename model_column
    df["model"] = df.model.apply(
        lambda x: x.split("/")[1] if x.startswith("mimir-project") else x
    )
    return df


def minmax_lix(lix: float) -> float:
    return 100 * (60 - lix) / (60 - 20)


def inverse_compression_ratio(comp_ratio: float) -> float:
    return 100 * (1 / comp_ratio)


def inverse_self_bleu(self_bleu: float) -> float:
    return 100 * (1 - self_bleu)


def normalize_and_rank(df: pd.DataFrame) -> pd.DataFrame:
    """Inverse compression ratio and self_bleu. Minmax lix. Add rank column based on inversed and minmax scores"""
    df["inverse_compression_nob"] = df.compression_ratio_nob.apply(
        inverse_compression_ratio
    )
    df["inverse_compression_nno"] = df.compression_ratio_nno.apply(
        inverse_compression_ratio
    )
    df["min_max_lix_nob"] = df.lix_score_nob.apply(minmax_lix)
    df["min_max_lix_nno"] = df.lix_score_nno.apply(minmax_lix)

    df["inverse_sb_nob"] = df.self_bleu_nob.apply(inverse_self_bleu)
    df["inverse_sb_nno"] = df.self_bleu_nno.apply(inverse_self_bleu)

    df["average"] = df[
        [
            "inverse_compression_nob",
            "inverse_compression_nno",
            "min_max_lix_nob",
            "min_max_lix_nno",
            "inverse_sb_nob",
            "inverse_sb_nno",
        ]
    ].mean(axis=1)

    df["rank"] = df["average"].rank(ascending=False)
    return df

## Create greedy table

In [2]:
def results_file_to_scores(results_file: Path) -> dict[str, float]:
    df = pd.read_json(results_file, lines=True)
    return {
        k: v
        for _, df_ in df.groupby("dataset")
        for k, v in df_.results.item()[0].items()
        if k in scores_to_keep
    }


def get_data(data_path: Path) -> dict[str, list[str | float]]:
    data = defaultdict(list)
    for e in data_path.glob("*/*/"):
        if not e.is_dir:
            continue
        model_name = f"{e.parent.name}/{e.name}"
        if model_name not in model_order:
            continue

        results_file = next(e.glob("evaluate_all/results.jsonl"), None)
        if results_file is None:
            print(f"No results file in {e}")
            continue
        scores = results_file_to_scores(results_file)
        data["model"].append(model_name)
        for score, value in scores.items():
            data[score].append(value)
    return data


def get_10x_data(data_path: Path) -> dict[str, list[str | float]]:
    """Get average scores of 10 runs"""
    data = defaultdict(list)
    for e in data_path.glob("*/*/"):
        if not e.is_dir:
            continue
        model_name = f"{e.parent.name}/{e.name}"
        if model_name not in model_order:
            continue
        results_files = list(e.glob("*/results.jsonl"))
        assert len(results_files) == 10
        scores_sums = defaultdict(float)
        for f in results_files:
            scores = results_file_to_scores(f)
            for k, v in scores.items():
                scores_sums[k] += v
        scores_avg = {k: v / 10 for k, v in scores_sums.items()}
        data["model"].append(model_name)
        for score, value in scores_avg.items():
            data[score].append(value)
    return data


greedy_p = Path("../results/greedy/")
data = get_data(greedy_p)
df = data_to_df(data)
df = normalize_and_rank(df)
df.to_csv(greedy_p / "results_table.csv", index=False)
df

Unnamed: 0,model,compression_ratio_nob,compression_ratio_nno,lix_score_nob,lix_score_nno,self_bleu_nob,self_bleu_nno,inverse_compression_nob,inverse_compression_nno,min_max_lix_nob,min_max_lix_nno,inverse_sb_nob,inverse_sb_nno,average,rank
0,mimir-7b-books,7.35,6.095,30.905883,23.46447,0.502069,0.328384,13.605442,16.406891,72.735292,91.338824,49.7931,67.161586,51.840189,15.0
1,mimir-7b-factual,6.11,5.493,28.712526,25.425102,0.506507,0.318005,16.366612,18.204988,78.218685,86.437246,49.349286,68.199541,52.79606,8.0
2,mimir-7b-fiction,6.391,5.575,23.240108,20.28289,0.509943,0.31501,15.647004,17.93722,91.899731,99.292774,49.005688,68.499037,57.046909,1.0
3,mimir-7b-newspapers,5.624,5.187,28.995241,27.083251,0.508379,0.342004,17.780939,19.278967,77.511897,82.291873,49.162143,65.799593,51.970902,14.0
4,mimir-7b-nonfiction,6.911,5.191,31.434708,26.054407,0.495569,0.31021,14.469686,19.264111,71.413229,84.863982,50.443098,68.978979,51.572181,19.0
5,mimir-7b-rightholders,6.309,5.382,30.136096,25.055535,0.498363,0.328599,15.850372,18.580453,74.65976,87.361162,50.163679,67.140133,52.292593,13.0
6,mimir-7b-translated,6.09,4.949,26.196948,23.950209,0.49259,0.327957,16.420361,20.206102,84.507629,90.124477,50.740975,67.204259,54.867301,2.0
7,mimir-7b-untranslated,6.918,6.075,31.648874,24.906518,0.477005,0.3168,14.455045,16.460905,70.877816,87.733704,52.29946,68.320042,51.691162,18.0
8,mimir-7b-untranslated-withnewspapers,6.72,5.563,27.999311,25.61284,0.504478,0.338717,14.880952,17.975912,80.001723,85.967899,49.552171,66.128274,52.417822,10.0
9,mimir-mistral-7b-base,4.79,4.28,31.521102,27.25665,0.479894,0.301469,20.876827,23.364486,71.197246,81.858374,52.010603,69.853118,53.193442,5.0


In [3]:
contrastive_10x_p = Path("../results/contrastive-10x/")
data = get_10x_data(contrastive_10x_p)
df = data_to_df(data)
df = normalize_and_rank(df)
df.to_csv(contrastive_10x_p / "results_table.csv", index=False)
df

Unnamed: 0,model,compression_ratio_nob,compression_ratio_nno,lix_score_nob,lix_score_nno,self_bleu_nob,self_bleu_nno,inverse_compression_nob,inverse_compression_nno,min_max_lix_nob,min_max_lix_nno,inverse_sb_nob,inverse_sb_nno,average,rank
0,mimir-7b-books,2.5505,2.4145,35.240286,29.356574,0.289581,0.227093,39.207998,41.416442,61.899285,76.608564,71.041897,77.290711,61.244149,4.0
1,mimir-7b-factual,2.4969,2.3764,35.056581,30.862024,0.28162,0.225679,40.049662,42.080458,62.358548,72.844941,71.838022,77.432137,61.100628,6.0
2,mimir-7b-fiction,2.4991,2.4087,30.050718,25.989698,0.30789,0.246371,40.014405,41.516171,74.873206,85.025756,69.210971,75.36285,64.333893,1.0
3,mimir-7b-newspapers,2.4586,2.3799,34.923326,31.581992,0.278964,0.234453,40.673554,42.018572,62.691684,71.04502,72.103571,76.55471,60.847852,9.0
4,mimir-7b-nonfiction,2.5666,2.3913,37.195785,31.06348,0.278809,0.214162,38.962051,41.818258,57.010538,72.3413,72.119117,78.583808,60.139179,14.0
5,mimir-7b-rightholders,2.4844,2.3794,34.803518,30.619289,0.282562,0.228682,40.251167,42.027402,62.991205,73.451778,71.743791,77.131798,61.26619,3.0
6,mimir-7b-translated,2.5178,2.3942,33.667913,28.181469,0.302444,0.237206,39.717213,41.767605,65.830218,79.546326,69.755595,76.279396,62.149392,2.0
7,mimir-7b-untranslated,2.5538,2.4059,36.265242,29.994066,0.276205,0.224568,39.157334,41.564487,59.336896,75.014835,72.379455,77.543185,60.832699,10.0
8,mimir-7b-untranslated-withnewspapers,2.4987,2.3926,35.105983,30.813779,0.282606,0.230568,40.020811,41.795536,62.235043,72.965552,71.739376,76.943229,60.949924,7.0
9,mimir-mistral-7b-base,2.4732,2.3665,37.211602,31.925394,0.258074,0.209464,40.433447,42.256497,56.970995,70.186515,74.192632,79.053621,60.515618,11.0
