In [1]:
import os
from pprint import pprint
from typing import List, Dict

import pandas as pd

from kbc_pul.amie.amie_output_rule_extraction import get_amie_rules_from_rule_tsv_file
from artificial_bias_experiments.amie_rule_learning import get_amie_rule_tsv_filename
from artificial_bias_experiments.images_paper_joint.pretty_rule_string import \
    get_paper_like_rule_string_from_prolog_str

from kbc_pul.data_structures.rule_wrapper import RuleWrapper

from kbc_pul.data_structures.rule_wrapper import is_pylo_rule_recursive
from kbc_pul.data_structures.rule_wrapper import create_amie_dataframe_from_rule_wrappers
from kbc_pul.project_info import project_dir as kbc_e_metrics_project_dir

from kbc_pul.project_info import data_dir

# List of rules in LaTex table

## Load rules

In [2]:
from kbc_pul.experiments_utils.file_utils import print_file_exists

dataset_name="yago3_10"
amie_min_std_confidence: float = 0.1

filename_ground_truth_dataset: str = os.path.join(
    data_dir, dataset_name, 'cleaned_csv', 'train.csv'
)
separator_ground_truth_dataset = "\t"

amie_rule_tsv_filename = get_amie_rule_tsv_filename(
    filename_ground_truth_dataset=filename_ground_truth_dataset,
    dataset_name=dataset_name,
    min_std_confidence=amie_min_std_confidence
)
print_file_exists(amie_rule_tsv_filename)

? file exists: /home/joschout/Documents/Repos/KUL-PUL/data/artificial_bias_experiments/yago3_10/amie/yago3_10_amie_rules_min_std_conf0.1.tsv
-> True


In [3]:
amie_rule_wrappers: List[RuleWrapper] = get_amie_rules_from_rule_tsv_file(
    amie_rule_tsv_filename
)

## Only consider the non-recursive rules

In [4]:

non_recursive_rules: List[RuleWrapper] = [
    rule_wrapper
    for rule_wrapper in amie_rule_wrappers
    if not is_pylo_rule_recursive(rule_wrapper.rule)
]
print(len(non_recursive_rules), " / ", len(amie_rule_wrappers))

49  /  149


In [5]:
df_amie_metrics_non_recursive_rules: pd.DataFrame = create_amie_dataframe_from_rule_wrappers(non_recursive_rules)
df_amie_metrics_non_recursive_rules.head()

Unnamed: 0,Rule,Head Coverage,Std Confidence,PCA Confidence,Positive Examples,Body size,PCA Body size,Functional variable
0,"hasneighbor(A,B) :- dealswith(A,B)",0.295495,0.12596,0.164493,164,1302,997,?b
1,"hasneighbor(A,B) :- dealswith(B,A)",0.297297,0.126728,0.174788,165,1302,944,?b
2,"imports(A,B) :- exports(A,B)",0.152672,0.15873,0.17094,60,378,351,?a
3,"ispoliticianof(A,B) :- isleaderof(A,B)",0.064755,0.14629,0.457516,140,957,306,?a
4,"livesin(A,B) :- iscitizenof(A,B)",0.139262,0.120185,0.471056,415,3453,881,?a


In [6]:
df_amie_metrics_non_recursive_rules = df_amie_metrics_non_recursive_rules.sort_values(by=["Rule"])
df_amie_metrics_non_recursive_rules.head()

Unnamed: 0,Rule,Head Coverage,Std Confidence,PCA Confidence,Positive Examples,Body size,PCA Body size,Functional variable
14,"actedin(A,B) :- directed(A,B)",0.017355,0.101806,0.104007,558,5481,5365,?b
46,"created(A,B) :- actedin(A,B),directed(A,B)",0.030578,0.379928,0.566845,212,558,374,?b
13,"created(A,B) :- directed(A,B)",0.173374,0.219303,0.326364,1202,5481,3683,?b
6,"dealswith(A,B) :- hasneighbor(A,B)",0.12596,0.295495,0.337449,164,555,486,?a
35,"dealswith(A,B) :- hasneighbor(A,H),hasneighbor...",0.177419,0.128906,0.147887,231,1792,1562,?a


In [7]:
df_amie_metrics_non_recursive_rules["Rule"] = df_amie_metrics_non_recursive_rules["Rule"].apply(
    lambda value: get_paper_like_rule_string_from_prolog_str(value)
)

In [8]:
def replace_functional_variable(old_str: str) -> str:
    if old_str == "?a":
        return "$s$"
    elif old_str == "?b":
        return "$o$"
    else:
        raise Exception(f"unexpected value {old_str}")
df_amie_metrics_non_recursive_rules["Functional variable"] = df_amie_metrics_non_recursive_rules["Functional variable"].apply(
    lambda value: replace_functional_variable(value)
)

 # Rename columns

In [9]:
column_map: Dict[str, str] = {
    "Rule": "$R$",
    "Std Confidence": "$\\textit{CWA}(R)$",
    "PCA Confidence": "$\\textit{PCA}(R)$",
    "Positive Examples": "$\left| \mathbf{R}^{l}\\right|$",
    "Body size": "$\left| \mathbf{R}\\right|$",
    "PCA Body size": "$\left| \mathbf{R_s^l}\\right|$",
    "Functional variable": "PCA domain"
}
pprint(column_map)

{'Body size': '$\\left| \\mathbf{R}\\right|$',
 'Functional variable': 'PCA domain',
 'PCA Body size': '$\\left| \\mathbf{R_s^l}\\right|$',
 'PCA Confidence': '$\\textit{PCA}(R)$',
 'Positive Examples': '$\\left| \\mathbf{R}^{l}\\right|$',
 'Rule': '$R$',
 'Std Confidence': '$\\textit{CWA}(R)$'}


In [10]:
df_pretty: pd.DataFrame = df_amie_metrics_non_recursive_rules.rename(
    columns=column_map
)
df_pretty.head()

Unnamed: 0,$R$,Head Coverage,$\textit{CWA}(R)$,$\textit{PCA}(R)$,$\left| \mathbf{R}^{l}\right|$,$\left| \mathbf{R}\right|$,$\left| \mathbf{R_s^l}\right|$,PCA domain
14,"$\langle s, directed, o \rangle \Rightarrow \l...",0.017355,0.101806,0.104007,558,5481,5365,$o$
46,"$\langle s, actedin, o \rangle \wedge \langle ...",0.030578,0.379928,0.566845,212,558,374,$o$
13,"$\langle s, directed, o \rangle \Rightarrow \l...",0.173374,0.219303,0.326364,1202,5481,3683,$o$
6,"$\langle s, hasneighbor, o \rangle \Rightarrow...",0.12596,0.295495,0.337449,164,555,486,$s$
35,"$\langle s, hasneighbor, h \rangle \wedge \lan...",0.177419,0.128906,0.147887,231,1792,1562,$s$


In [11]:
len(df_pretty)

49

In [12]:
# df_pretty_part1 = df_pretty.iloc[:50]
# df_pretty_part2 = df_pretty.iloc[50:]


In [13]:
# To file

In [14]:
dir_latex_table: str = os.path.join(
    kbc_e_metrics_project_dir,
    "paper_latex_tables",
    "amie-rules"
)
if not os.path.exists(dir_latex_table):
    os.makedirs(dir_latex_table)

filename_tsv_table: str = os.path.join(
    dir_latex_table,
    "ammie-rules-non-recursive.tsv"
)
df_amie_metrics_non_recursive_rules.to_csv(
    filename_tsv_table,
    sep="\t",
    index=False
)


filename_latex_table: str = os.path.join(
    dir_latex_table,
    "amie-rules-non-recursive.tex"
)
# filename_latex_table_part2: str = os.path.join(
#     dir_latex_table,
#     "amie-rules-non-recursive-part2.tex"
# )



In [15]:
# for df_to_output, df_filename in [
#     (df_pretty_part1, filename_latex_table_part1),
#     (df_pretty_part2, filename_latex_table_part2)
# ]:
with open(filename_latex_table, "w") as latex_ofile:
    with pd.option_context("max_colwidth", 1000):
        latex_ofile.write(
            df_pretty.to_latex(
                index=False,
                float_format="{:0.3f}".format,
                escape=False,
                multicolumn=True
            #     caption="$[widehat{conf}-conf]^2$ for SCAR. "
            #             "std=standard confidence, "
            #             "PCA (S) = PCA confidence with $s$ as domain, "
            #             "PCA (O) = PCA confidence with $o$  as domain, "
            #             "IPW = PCA confidence with $\hat{e}=e$, "
            #             "IPW +/- $" + f"{label_frequency_est_diff:0.1}" + "$ = IPW confidence with $\hat{e}=e+/-" +  f"{label_frequency_est_diff:0.1}" + "$."
            )
        )

with open(filename_tsv_table, "w") as tsv_ofile:
    tsv_ofile.write(df_amie_metrics_non_recursive_rules.to_csv(
        index=False,
        sep="\t"
    ))

print(filename_latex_table)



/home/joschout/Documents/Repos/KUL-PUL/paper_latex_tables/amie-rules/amie-rules-non-recursive.tex
