In [None]:
import os
import random
import pandas as pd
from scipy import stats
from statsmodels.stats import multitest

In [None]:
METRICS = ["MRR", "H@1", "H@3", "H@10"]
ETA = ["prop", "subevent", "role", "causation"]
COLUMNS = ETA + ["syntax"] + METRICS

data = []
for method in ["ultra", "simkgc", "ilp"]:
    df = pd.read_csv(f"../stats/syntax_{method}.csv", index_col=0)[COLUMNS]
    df["method"] = method
    data.append(df)
df = pd.concat(data)

print(f"{df.syntax.unique().shape[0]} syntaxes: {df.syntax.unique()}")
random.seed(23)
df.sample(5)

In [None]:
agg ={x: "mean" for x in METRICS}
agg.update({"prop": "count"})
df.groupby("syntax").agg(agg)

In [None]:
ranks = df.groupby(ETA+["syntax", "method"])["MRR"].rank(method='max', ascending=False)
df["rank"] = ranks
df[df["rank"]==1][ETA +["syntax", "method"] + METRICS].groupby("syntax").agg(agg)

In [None]:
df[df["rank"]==1][ETA +["syntax", "method"] + METRICS].groupby("syntax").agg(agg).reset_index().to_csv("../stats/syntax_mean.csv")

In [None]:
for metric in METRICS:
    print(f"--- {metric} ---")
    print(stats.kruskal(
        df[(df["rank"]==1) & (df.syntax == "hyper_relational_rdf_star")][metric].values,
        df[(df["rank"]==1) & (df.syntax == "simple_rdf_prop")][metric].values,
        df[(df["rank"]==1) & (df.syntax == "simple_rdf_reification")][metric].values,
        df[(df["rank"]==1) & (df.syntax == "simple_rdf_sp")][metric].values,
    ))
    print("--- ---")

In [None]:
metric = "H@1"
syntaxes = df.syntax.unique().tolist()
pvals = []
for i, syntax1 in enumerate(syntaxes):
    for j in range(i+1, len(syntaxes)):
        syntax2 = syntaxes[j]
        print(f"{syntax1} vs {syntax2}")
        res = stats.mannwhitneyu(
            df[(df["rank"]==1) & (df.syntax == syntax1)][metric].values,
            df[(df["rank"]==1) & (df.syntax == syntax2)][metric].values,
            method="asymptotic",
        )
        print(res)
        pvals.append(res.pvalue)
        print("==========================")

In [None]:
hb_correction = multitest.multipletests(pvals, method="holm")
hb_correction