In [1]:
import pandas as pd 
import numpy as np
import os
from subprocess import check_output
import shutil

In [2]:
root_source = "./../../FB15k-237-num/"
store_data = "./../../Data_FB/FB15K_baseline.tsv"

In [3]:
data = open(root_source+"train.txt", "r")
f = open(store_data, "w")

predicate_to_find = set()

for line in data: 
    f.write(line)
    predicate_to_find.add(line.split("\n")[0].split("\t")[1])
    
data.close()
f.close()

In [4]:
htr = ""
for p in predicate_to_find:
    htr+=p+","
htr = htr[:-1]

In [5]:
def parse_amie(res_rules_raw):    
    rules_per_cv = {}
    cpt = 0
    rules = set()
    for line in res_rules_raw.decode("utf-8").split("\n"):
        if (line != "") and (line[0] == "?"):
            dic = {}
            parts = line.split("\t")

            conclusion_raw = parts[0].split("=>")[1].split("  ")
            conclusion_raw[0] = conclusion_raw[0][1:]
            dic["conclusion"] = Atom(conclusion_raw)

            hypotheses_raw = parts[0].split("=>")[0].split("  ")
            hypotheses = []
            for i in range(0, len(hypotheses_raw)-1, 3):
                hypotheses.append(Atom(hypotheses_raw[i:i+3]))
            dic["hypotheses"] = hypotheses


            rules.add(Rule(dic["hypotheses"], dic["conclusion"], parts[1:]))
    return rules

In [6]:
class Atom:
    
    def __init__(self, atom_raw):
        self._subject = atom_raw[0]
        self._predicate = atom_raw[1]
        self._objectD = atom_raw[2]
        
    def __hash__(self):
        return hash((self._subject, self._predicate, self._objectD))
    
    def __repr__(self):
        return f"{self.subject} {self.predicate} {self.objectD}"
    
    def __eq__(self, other):
        return self.subject==other.subject and self.predicate==other.predicate and self.objectD==other.objectD
        
    @property
    def subject(self):
        return self._subject
    
    @property
    def predicate(self):
        return self._predicate
    
    @property
    def objectD(self):
        return self._objectD
    
class Rule:
    
    def __init__(self, hypotheses, conclusion, otherRes, fromDict=False):
        if not fromDict:
            if not isinstance(hypotheses, tuple):
                self._hypotheses = tuple(hypotheses)
            else : 
                self._hypotheses = hypotheses
            self._conclusion = conclusion
            self._size_hypotheses = len(hypotheses)
            self._headCoverage = float(otherRes[0])
            self._stdConfidence = float(otherRes[1])
            self._pcaConfidence = float(otherRes[2])
            self._precision_train = None
            self._precision_test = None
        else:
            
            if not isinstance(hypotheses, tuple):
                self._hypotheses = tuple(hypotheses)
            else : 
                self._hypotheses = hypotheses
            self._conclusion = conclusion
            self._size_hypotheses = len(hypotheses)
            self._headCoverage = float(otherRes[0])
            self._stdConfidence = float(otherRes[1])
            self._pcaConfidence = float(otherRes[2])
            self._precision_train = float(otherRes[3])
            self._precision_test = float(otherRes[4])
        
        
    def __hash__(self):
        return hash((self._hypotheses, self._conclusion))
        
    def __repr__(self):
        toWrite=""
        for atom in self.hypotheses:
            toWrite += f"{atom} & "
        toWrite = toWrite[:-3] + " => " 
        toWrite += str(self.conclusion)
        return toWrite

    def __eq__(self, other):
        if not isinstance(other, Rule):
            return False
        return (self.conclusion == other.conclusion) and (set(self.hypotheses) == set(other.hypotheses))
    
    @property
    def hypotheses(self):
        return self._hypotheses
    
    @property
    def conclusion(self):
        return self._conclusion
    
    @property
    def size_hypotheses(self):
        return self._size_hypotheses
    
    @property
    def headCoverage(self):
        return self._headCoverage
    
    @property
    def stdConfidence(self):
        return self._stdConfidence
    
    @property
    def pcaConfidence(self):
        return self._pcaConfidence
    
    @property
    def precision_train(self):
        return self._precision_train
    
    @property
    def precision_test(self):
        return self._precision_test
    
    def setPrecisionTrain(self, precision):
        self._precision_train = precision
    
    def setPrecisionTest(self, precision):
        self._precision_test = precision
        
    def toDict(self):
        return {"hypotheses":self._hypotheses, "conclusion":self._conclusion, "size_hypothese":self._size_hypotheses, "headCoverage":self._headCoverage, "stdConfidence":self._stdConfidence, "pcaConfidence":self._pcaConfidence, "precision_train": self._precision_train, "precision_test":self._precision_test}
    
    def fromDict(row):
        return Rule( row["hypotheses"], row["conclusion"], row.values[2:], True)
            
# Given a feature we will compare it to a threshold      
def limit_by_threshold(X, feature, threshold):
    return X[feature] >= threshold

# Given Amie sets of rules and new parameters, it returns new set of rules describe by these parameters.
def add_parameters(amie_responses, parameters):
    new_responses = {}
    for amie_response in amie_responses:
        for para in parameters:
            
            #Optimizable
            new_response_raw = amie_responses[amie_response].copy()
            new_response = {}
            for r in new_response_raw:
                new_response[r] = new_response_raw[r].toDict()
            new_response = pd.DataFrame.from_dict(new_response, orient="index")
            
            name = amie_response
            for sub_para in para:
                if len(new_response) != 0:
                    new_response = new_response.loc[new_response.apply(func=limit_by_threshold, axis=1, feature=sub_para[0], threshold=sub_para[1])]
                if sub_para[0] == "stdConfidence":
                    name += "\n"+"stdC"+"="+str(sub_para[1])
                elif sub_para[0] == "pcaConfidence":
                    name += "\n"+"pcaC"+"="+str(sub_para[1])
                else :
                    name += "\n"+"hC"+"="+str(sub_para[1])
            new_responses[name] = new_response
    return new_responses


def save_sets_rule(root, set_rules):
    if not path.isdir(root+"/save"):
        os.mkdir(root+"/save")
    else : 
        shutil.rmtree(root+"/save")
        os.mkdir(root+"/save")
    for set_rule in set_rules:
        set_rules[set_rule].to_csv(root+"/save/"+set_rule+".tsv")

In [7]:
res = check_output(f'java -jar ./../amie3.jar -htr {htr} {store_data}', shell=True)

res_parsed = parse_amie(res)

In [8]:
len(res_parsed)

7860

In [9]:
res_parsed

{?f /organization/organization/headquarters./location/mailing_address/country ?b & ?a /organization/organization_member/member_of./organization/organization_membership/organization ?f => ?a /organization/organization/headquarters./location/mailing_address/country ?b,
 ?e /business/job_title/people_with_this_title./business/employment_tenure/company ?a & ?e /sports/sports_position/players./sports/sports_team_roster/position ?b => ?a /american_football/football_team/current_roster./sports/sports_team_roster/position ?b,
 ?b /award/award_nominee/award_nominations./award/award_nomination/nominated_for ?a & ?a /award/award_winning_work/awards_won./award/award_honor/award_winner ?b => ?a /tv/tv_program/program_creator ?b,
 ?a /award/award_winner/awards_won./award/award_honor/award_winner ?b & ?b /base/popstra/celebrity/dated./base/popstra/dated/participant ?a => ?a /base/popstra/celebrity/breakup./base/popstra/breakup/participant ?b,
 ?a /award/award_winning_work/awards_won./award/award_hono