In [24]:
import pandas as pd
import os

In [25]:
results = pd.read_csv("results.csv", index_col=0, sep="\t")
finetuned_results = pd.read_csv("finetuned_results.csv", index_col=0, sep="\t")

In [26]:
# make table good looking in latex
t_results = results.T.copy()
for idx, i in enumerate(["lang", "split", "setting", "author", "model"]):
    newcol = []
    for j in t_results.index.str.split("_"):
        newcol.append(j[idx])
    t_results[i] = newcol
t_results = t_results.reset_index(drop=True).set_index(["lang", "split", "setting", "model"])
t_results = t_results.drop("author", axis=1)
t_results.columns = ["\\rotatebox{{90}}{{{col_name}}}".format(col_name=col.replace("_", "-")) for col in t_results.columns]
t_results = t_results.loc[:, ~t_results.columns.str.contains("num")]
col_format = "l" * 4 + "c" * (len(t_results.columns))
col_format = col_format[:-18] + "|" + col_format[-18:-9] + "|" + col_format[-9:]  # Add vertical line before the last 8 columns
formatters = {
        **{
            i: lambda x: f"{x:.2f}" for i in t_results.columns if "P@" not in i and "NDCG" not in i
            },
        **{
            i: lambda x: "\gradient{" + f"{x:.2f}" + "}"
            for i in t_results.columns if "P@" in i or "map" in i or "recip-rank" in i
            or "bpref" in i or "Rprec" in i
            },
        **{
            i: lambda x: r"\newgradient{" + f"{x:.2f}" + "}"
            for i in t_results.columns if "NDCG" in i
            },
    }

def save_to_latex(results, file_name, col_format, formatters):
    latex_str = results.to_latex(
        index=True,
        float_format="%.3f",
        column_format=col_format,
        formatters=formatters,
    )
    latex_str = (latex_str.replace(r'\toprule', '').replace("cline", "cmidrule"))
    lines = latex_str.split("\n")
    new_lines = []
    for idx, line in enumerate(lines[:-1]):
        if "rule" in line:
            if "rule" in lines[idx + 1]:
                continue
        if len(line.split("\cmidrule")) > 1:
            line = "\cmidrule" + line.split('\cmidrule')[1]
            line = line.strip()
        new_lines.append(line)
    latex_str = "\n".join(new_lines)

    os.makedirs("tables", exist_ok=True)
    with open(f"tables/{file_name}", "w") as f:
        f.write(latex_str)

save_to_latex(t_results, "results.tex", col_format, formatters)

In [None]:
col_to_keep = ["{map", "recip-rank", "P@5}", "P@10}", "NDCG@5}", "NDCG@10}"]
col_to_keep = [i for i in t_results.columns if any(j in i for j in col_to_keep)]
lang_col_format = "l" * 3 + "c" * (len(col_to_keep))
latin_pretrained_results = t_results.loc[("latin"), col_to_keep]
save_to_latex(latin_pretrained_results, "latin_pretrained_results.tex", lang_col_format, formatters)
greek_pretrained_results = t_results.loc["greek", col_to_keep]
save_to_latex(greek_pretrained_results, "greek_pretrained_results.tex", lang_col_format, formatters)
latin_greek_pretrained_results = t_results.loc[("latin+greek"), col_to_keep]
save_to_latex(latin_greek_pretrained_results, "latin_greek_pretrained_results.tex", lang_col_format, formatters)
greek_latin_pretrained_results = t_results.loc[("greek+latin"), col_to_keep]
save_to_latex(greek_latin_pretrained_results, "greek_latin_pretrained_results.tex", lang_col_format, formatters)
multilingual_pretrained_results = pd.concat([t_results.loc[("latin+greek"), col_to_keep], t_results.loc[("greek+latin"), col_to_keep]], axis=1)
save_to_latex(multilingual_pretrained_results, "multilingual_pretrained_results.tex", lang_col_format + lang_col_format[3:], formatters)

In [28]:
t_finetuned_results = finetuned_results.T.copy()
for idx, i in enumerate(["lang", "split", "setting", "author", "model"]):
    newcol = []
    for j in t_finetuned_results.index.str.split("_"):
        newcol.append(j[idx])
    t_finetuned_results[i] = newcol
t_finetuned_results = t_finetuned_results.reset_index(drop=True).set_index(["lang", "split", "setting", "model"])
t_finetuned_results = t_finetuned_results.drop("author", axis=1)
t_finetuned_results.columns = ["\\rotatebox{{90}}{{{col_name}}}".format(col_name=col.replace("_", "-")) for col in t_finetuned_results.columns]

t_finetuned_results = t_finetuned_results.loc[:, ~t_finetuned_results.columns.str.contains("num")]
col_format = "l" * 4 + "c" * (len(t_finetuned_results.columns))
col_format = col_format[:-18] + "|" + col_format[-18:-9] + "|" + col_format[-9:]  # Add vertical line before the last 8 columns
save_to_latex(t_finetuned_results, "finetuned_results.tex", col_format=col_format, formatters=formatters)
# t_finetuned_results

In [29]:
save_to_latex(
    t_finetuned_results.loc[:, col_to_keep],
    "selected_finetuned_results.tex",
    col_format="l" * 4 + "c" * len(col_to_keep),
    formatters=formatters
)

In [None]:
latin_finetuned_results = t_finetuned_results.loc[("latin"), col_to_keep]
save_to_latex(latin_finetuned_results, "latin_finetuned_results.tex", lang_col_format, formatters)
greek_finetuned_results = t_finetuned_results.loc["greek", col_to_keep]
save_to_latex(greek_finetuned_results, "greek_finetuned_results.tex", lang_col_format, formatters)
latin_greek_finetuned_results = t_finetuned_results.loc[("latin+greek"), col_to_keep]
save_to_latex(latin_greek_finetuned_results, "latin_greek_finetuned_results.tex", lang_col_format, formatters)
greek_latin_finetuned_results = t_finetuned_results.loc[("greek+latin"), col_to_keep]
save_to_latex(greek_latin_finetuned_results, "greek_latin_finetuned_results.tex", lang_col_format, formatters)
multilingual_finetuned_results = pd.concat([t_finetuned_results.loc[("latin+greek"), col_to_keep], t_finetuned_results.loc[("greek+latin"), col_to_keep]], axis=1)
save_to_latex(multilingual_finetuned_results, "multilingual_finetuned_results.tex", lang_col_format + lang_col_format[3:], formatters)

In [37]:
arranged_finetuned_results = pd.concat([
    pd.concat([latin_finetuned_results, greek_finetuned_results], axis=1),
    pd.concat([latin_greek_finetuned_results, greek_latin_finetuned_results], axis=1),
], axis=0)
save_to_latex(arranged_finetuned_results, "arranged_finetuned_results.tex", lang_col_format + lang_col_format[3:], formatters)