# ILP experiments

In [None]:
import pandas as pd
from scipy import stats

In [None]:
def add_static_info(df):
    df["method"] = "ILP"
    df["td"] = "simple-triple+text+hr-triple"
    df["syntax"] = "hyper_relational_rdf_star"
    return df

In [None]:
PATH = "results/results.csv"
MAIN_METRIC = "validation.both.realistic.adjusted_arithmetic_mean_rank"
ETA = ["prop", "subevent", "role", "causation"]
HP = ["lr", "embedding_dim", "batch_size"]

METRICS = [
    ("validation.both.realistic.adjusted_arithmetic_mean_rank", "AMR"),  # lower is better
    ("validation.both.realistic.inverse_harmonic_mean_rank", "MRR"),
    ("validation.both.realistic.hits_at_1", "H@1") ,
    ("validation.both.realistic.hits_at_3", "H@3") ,
    ("validation.both.realistic.hits_at_5", "H@5") ,
    ("validation.both.realistic.hits_at_10", "H@10") 
]

df = pd.read_csv(PATH, index_col=0)
df = df.rename(columns={x: k for x, k in METRICS})
METRICS = [x[1] for x in METRICS]
print(f"# of experiments: {len(df)}")
df.head(3)

In [None]:
eta_counts = df.groupby(ETA).size().reset_index(name='exp_count')
df = df.merge(eta_counts, on=ETA, how='left')
eta_counts


In [None]:
add_static_info(df[df.causation==1]).to_csv("results/results_syntax.csv")

In [None]:
def print_corr(df, cols_param, cols_metric):
    for col in cols_param:
        for m in cols_metric:
            res = stats.spearmanr(df[col], df[m])
            print(f"{col.upper()}:\t vs. {m.upper()}: {res.statistic:.4f}, p={res.pvalue:.4f}")

In [None]:
print("Spearman correlations: ALL")
print("Semantic--")
print_corr(df=df, cols_param=["prop", "subevent"], cols_metric=METRICS)

In [None]:
data = []
for eta, group in df.groupby(ETA):
    for hp in HP:
        for m in METRICS:
            res = stats.spearmanr(group[hp], group[m])
            data.append(list(eta) + [hp, m, res.statistic, res.pvalue])
df_corr_hp_metric = pd.DataFrame(
    data,
    columns=ETA + ["hp", "metric", "corr", "pval"]
)
df_corr_hp_metric = df_corr_hp_metric.merge(eta_counts, on=ETA, how='left')
add_static_info(df_corr_hp_metric).to_csv("results/corr_hp_metric_per_eta.csv")
df_corr_hp_metric.head(3)

In [None]:
df_corr_hp_metric[df_corr_hp_metric.pval < 0.05]

In [None]:
ranks = df.groupby(ETA)["AMR"].rank(method='min', ascending=True)
df["rank"] = ranks
add_static_info(df[df["rank"]==1][ETA + HP + ["exp_count"]]).to_csv("results/best_hp_per_eta.csv")
df[df["rank"]==1][ETA + HP]

In [None]:
add_static_info(df[df["rank"]==1][ETA + METRICS + ["exp_count"]]).to_csv("results/best_metric_per_eta.csv")
df[df["rank"]==1][ETA + METRICS]

In [None]:
data = []
for eta in ETA:
    for m in METRICS:
        res = stats.spearmanr(group[hp], group[m])
        data.append([eta, m, res.statistic, res.pvalue])
df_corr_eta_metric = pd.DataFrame(
    data,
    columns=["eta", "metric", "corr", "pval"]
)
add_static_info(df_corr_eta_metric).to_csv("results/corr_eta_metric.csv")
df_corr_eta_metric

In [None]:
print_corr(df, ETA, METRICS)