In [1]:
import pandas as pd
import os

In [2]:
results = pd.read_csv("results.csv", index_col=0, sep="\t")
finetuned_results = pd.read_csv("finetuned_results.csv", index_col=0, sep="\t")

In [3]:
def format_names_for_macros(latex_str):
    return (latex_str
            .replace("SimCSE25", "\simcseuno{}")
            .replace("SimCSE21", "\simcsedue{}")
            .replace("GreBerta", "\greberta{}")
            .replace("LaBerta", "\laberta{}")
            .replace("SPhilBERTa", "\sphilberta{}")
            .replace("PhilBERTa", "\philberta{}")
    )

In [4]:
# make table good looking in latex
t_results = results.T.copy()
for idx, i in enumerate(["lang", "split", "setting", "author", "model"]):
    newcol = []
    for j in t_results.index.str.split("_"):
        newcol.append(j[idx])
    t_results[i] = newcol
t_results = t_results.reset_index(drop=True).set_index(["lang", "split", "setting", "model"])
t_results = t_results.drop("author", axis=1)
t_results.columns = ["\\rotatebox{{90}}{{{col_name}}}".format(col_name=col.replace("_", "-")) for col in t_results.columns]
t_results = t_results.loc[:, ~t_results.columns.str.contains("num")]
col_format = "l" * 4 + "c" * (len(t_results.columns))
col_format = col_format[:-18] + "|" + col_format[-18:-9] + "|" + col_format[-9:]  # Add vertical line before the last 8 columns
formatters = {
        **{
            i: lambda x: f"{x:.2f}" for i in t_results.columns if "P@" not in i and "NDCG" not in i
            },
        **{
            i: lambda x: "\gradient{" + f"{x:.2f}" + "}"
            for i in t_results.columns if "P@" in i or "map" in i or "recip-rank" in i
            or "bpref" in i or "Rprec" in i
            },
        **{
            i: lambda x: r"\newgradient{" + f"{x:.2f}" + "}"
            for i in t_results.columns if "NDCG" in i
            },
    }

def save_to_latex(results, file_name, col_format, formatters):
    latex_str = results.to_latex(
        index=True,
        float_format="%.3f",
        column_format=col_format,
        formatters=formatters,
    )
    latex_str = (latex_str.replace(r'\toprule', '').replace("cline", "cmidrule"))
    lines = latex_str.split("\n")
    new_lines = []
    for idx, line in enumerate(lines[:-1]):
        if "rule" in line:
            if "rule" in lines[idx + 1]:
                continue
        if len(line.split("\cmidrule")) > 1:
            line = "\cmidrule" + line.split('\cmidrule')[1]
            line = line.strip()
        new_lines.append(line)
    latex_str = "\n".join(new_lines)

    latex_str = format_names_for_macros(latex_str)

    os.makedirs("tables", exist_ok=True)
    with open(f"tables/{file_name}", "w") as f:
        f.write(latex_str)

save_to_latex(t_results, "results.tex", col_format, formatters)

In [5]:
col_to_keep = ["{map", "recip-rank", "P@5}", "P@10}", "NDCG@5}", "NDCG@10}"]
col_to_keep = [i for i in t_results.columns if any(j in i for j in col_to_keep)]
lang_col_format = "l" * 3 + "c" * (len(col_to_keep))
latin_pretrained_results = t_results.loc[("latin"), col_to_keep]
save_to_latex(latin_pretrained_results, "latin_pretrained_results.tex", lang_col_format, formatters)
greek_pretrained_results = t_results.loc["greek", col_to_keep]
save_to_latex(greek_pretrained_results, "greek_pretrained_results.tex", lang_col_format, formatters)
latin_greek_pretrained_results = t_results.loc[("latin+greek"), col_to_keep]
save_to_latex(latin_greek_pretrained_results, "latin_greek_pretrained_results.tex", lang_col_format, formatters)
greek_latin_pretrained_results = t_results.loc[("greek+latin"), col_to_keep]
save_to_latex(greek_latin_pretrained_results, "greek_latin_pretrained_results.tex", lang_col_format, formatters)
multilingual_pretrained_results = pd.concat([t_results.loc[("greek"), col_to_keep], t_results.loc[("latin"), col_to_keep]], axis=1)
save_to_latex(multilingual_pretrained_results, "multilingual_pretrained_results.tex", lang_col_format + lang_col_format[3:], formatters)
crosslingual_pretrained_results = pd.concat([t_results.loc[("latin+greek"), col_to_keep], t_results.loc[("greek+latin"), col_to_keep]], axis=1)
save_to_latex(crosslingual_pretrained_results, "crosslingual_pretrained_results.tex", lang_col_format + lang_col_format[3:], formatters)

In [6]:
t_finetuned_results = finetuned_results.T.copy()
for idx, i in enumerate(["lang", "split", "setting", "author", "model"]):
    newcol = []
    for j in t_finetuned_results.index.str.split("_"):
        newcol.append(j[idx])
    t_finetuned_results[i] = newcol
t_finetuned_results = t_finetuned_results.reset_index(drop=True).set_index(["lang", "split", "setting", "model"])
t_finetuned_results = t_finetuned_results.drop("author", axis=1)
t_finetuned_results.columns = ["\\rotatebox{{90}}{{{col_name}}}".format(col_name=col.replace("_", "-")) for col in t_finetuned_results.columns]

t_finetuned_results = t_finetuned_results.loc[:, ~t_finetuned_results.columns.str.contains("num")]
col_format = "l" * 4 + "c" * (len(t_finetuned_results.columns))
col_format = col_format[:-18] + "|" + col_format[-18:-9] + "|" + col_format[-9:]  # Add vertical line before the last 8 columns
save_to_latex(t_finetuned_results, "finetuned_results.tex", col_format=col_format, formatters=formatters)
# t_finetuned_results

In [7]:
save_to_latex(
    t_finetuned_results.loc[:, col_to_keep],
    "selected_finetuned_results.tex",
    col_format="l" * 4 + "c" * len(col_to_keep),
    formatters=formatters
)

In [8]:
benchmark_finetuned_results = (
    pd.concat([t_finetuned_results, t_results])
    .reset_index())
benchmark_finetuned_results = benchmark_finetuned_results.loc[
    benchmark_finetuned_results["model"].apply(lambda x: x in ["SPhilBERTa", "SimCSE25", "SimCSE21"])
].set_index(["lang", "split", "setting", "model"]).sort_index(ascending=False)

In [25]:
t_results.loc["latin+greek", "\\rotatebox{90}{map}"]

  t_results.loc["latin+greek", "\\rotatebox{90}{map}"]


split   setting        model     
silver  target         GreBerta      0.076761
                       LaBerta       0.081383
                       PhilBERTa     0.513198
                       SPhilBERTa    0.928204
        target+random  GreBerta      0.058572
                       LaBerta       0.490596
                       PhilBERTa     0.957747
                       SPhilBERTa    0.903178
gold    target         GreBerta      0.240748
                       LaBerta       0.145098
                       PhilBERTa     0.440205
                       SPhilBERTa    0.753014
        target+random  GreBerta      0.144141
                       LaBerta       0.151594
                       PhilBERTa     0.607761
                       SPhilBERTa    0.714042
Name: \rotatebox{90}{map}, dtype: float64

In [23]:
benchmark_finetuned_results.loc["latin+greek", "\\rotatebox{90}{map}"]
# print("\\rotatebox{90}{P@5}")

  benchmark_finetuned_results.loc["latin+greek", "\\rotatebox{90}{map}"]


split   setting        model     
silver  target+random  SimCSE25      0.942972
                       SimCSE21      0.942617
                       SPhilBERTa    0.903178
        target         SimCSE25      0.927215
                       SimCSE21      0.931312
                       SPhilBERTa    0.928204
gold    target+random  SimCSE25      0.725032
                       SimCSE21      0.851541
                       SPhilBERTa    0.714042
        target         SimCSE25      0.741883
                       SimCSE21      0.768847
                       SPhilBERTa    0.753014
Name: \rotatebox{90}{map}, dtype: float64

In [9]:
latin_finetuned_results = benchmark_finetuned_results.loc[("latin"), col_to_keep]
save_to_latex(latin_finetuned_results, "latin_finetuned_results.tex", lang_col_format, formatters)
greek_finetuned_results = benchmark_finetuned_results.loc["greek", col_to_keep]
save_to_latex(greek_finetuned_results, "greek_finetuned_results.tex", lang_col_format, formatters)
latin_greek_finetuned_results = benchmark_finetuned_results.loc[("latin+greek"), col_to_keep]
save_to_latex(latin_greek_finetuned_results, "latin_greek_finetuned_results.tex", lang_col_format, formatters)
greek_latin_finetuned_results = benchmark_finetuned_results.loc[("greek+latin"), col_to_keep]
save_to_latex(greek_latin_finetuned_results, "greek_latin_finetuned_results.tex", lang_col_format, formatters)
multilingual_finetuned_results = pd.concat(
    [
        benchmark_finetuned_results.loc[("latin+greek"), col_to_keep], 
        benchmark_finetuned_results.loc[("greek+latin"), col_to_keep]
     ],
    axis=1)
save_to_latex(multilingual_finetuned_results, "multilingual_finetuned_results.tex", lang_col_format + lang_col_format[3:], formatters)

In [12]:
arranged_finetuned_results = pd.concat([
    pd.concat([greek_finetuned_results, latin_finetuned_results], axis=1),
    pd.concat([latin_greek_finetuned_results, greek_latin_finetuned_results], axis=1),
], axis=0)
save_to_latex(arranged_finetuned_results, "arranged_finetuned_results.tex", lang_col_format + lang_col_format[3:], formatters)

In [11]:
t_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,\rotatebox{90}{map},\rotatebox{90}{gm-map},\rotatebox{90}{bpref},\rotatebox{90}{Rprec},\rotatebox{90}{recip-rank},\rotatebox{90}{P@5},\rotatebox{90}{P@10},\rotatebox{90}{P@15},\rotatebox{90}{P@20},\rotatebox{90}{P@30},...,\rotatebox{90}{P@1000},\rotatebox{90}{NDCG@5},\rotatebox{90}{NDCG@10},\rotatebox{90}{NDCG@15},\rotatebox{90}{NDCG@20},\rotatebox{90}{NDCG@30},\rotatebox{90}{NDCG@100},\rotatebox{90}{NDCG@200},\rotatebox{90}{NDCG@500},\rotatebox{90}{NDCG@1000}
lang,split,setting,model,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
latin,silver,target,GreBerta,0.687870,0.623790,0.6400,0.632,0.967833,0.632,0.365,0.255333,0.1970,0.136667,...,0.00500,0.708254,0.762394,0.778590,0.787486,0.799011,0.827544,0.838476,0.846151,0.846151
latin,silver,target,LaBerta,0.893462,0.862516,0.8692,0.864,0.973333,0.864,0.458,0.313333,0.2385,0.160000,...,0.00500,0.891897,0.920337,0.931134,0.936762,0.938981,0.946937,0.947924,0.950034,0.950034
latin,silver,target,PhilBERTa,0.932090,0.921851,0.9152,0.892,0.995000,0.892,0.478,0.321333,0.2430,0.164000,...,0.00500,0.919942,0.956212,0.959791,0.962946,0.967284,0.969879,0.970794,0.971614,0.971614
latin,silver,target,SPhilBERTa,0.954387,0.947895,0.9376,0.922,0.995000,0.922,0.486,0.327333,0.2470,0.165667,...,0.00500,0.942664,0.970423,0.974940,0.977382,0.979598,0.980808,0.981306,0.981306,0.981306
latin,silver,target+random,GreBerta,0.824218,0.774044,0.7884,0.764,0.972833,0.764,0.432,0.296000,0.2265,0.154667,...,0.00499,0.816769,0.872811,0.883414,0.890607,0.898542,0.909840,0.914519,0.915362,0.916802
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
greek+latin,gold,target,SPhilBERTa,0.727876,0.684286,0.6520,0.640,0.900000,0.640,0.405,0.306667,0.2375,0.163333,...,0.00500,0.688217,0.783032,0.832913,0.845038,0.856242,0.862517,0.862517,0.862517,0.862517
greek+latin,gold,target+random,GreBerta,0.010138,0.008755,0.0000,0.000,0.006543,0.000,0.000,0.000000,0.0000,0.000000,...,0.00500,0.000000,0.000000,0.000000,0.000000,0.000000,0.015833,0.081214,0.154297,0.207769
greek+latin,gold,target+random,LaBerta,0.023282,0.015476,0.0000,0.000,0.021624,0.000,0.005,0.006667,0.0100,0.016667,...,0.00500,0.000000,0.004902,0.009632,0.017623,0.039216,0.082231,0.126938,0.213108,0.238533
greek+latin,gold,target+random,PhilBERTa,0.149055,0.094126,0.0680,0.100,0.202525,0.100,0.120,0.090000,0.0775,0.076667,...,0.00500,0.095861,0.171155,0.184317,0.200599,0.254824,0.364078,0.388248,0.398414,0.401974
