In [1]:
import json
import itertools
import numpy as np
import pandas as pd

import rule_application as ra
from grapher import Grapher
from temporal_walk import Temporal_Walk
from rule_learning import Rule_Learner, verbalize_rule, rules_statistics
from score_functions import score_12

In [2]:
dataset = "icews14"
rules_file = "101221144026_r[1,2,3]_n200_exp_s12_rules.json"
dir_path = dataset + "/"

In [3]:
data = Grapher("../data/" + dir_path)
print("Shapes: ", np.shape(data.train_idx), np.shape(data.valid_idx), np.shape(data.test_idx))
print(data.train_idx[:5])

Grapher initialized.
Shapes:  (127370, 4) (27646, 4) (26444, 4)
[[ 146  178 2665    0]
 [ 265  156 1183    0]
 [ 382  127 2100    0]
 [ 467  224 1176    0]
 [ 467  229 6495    0]]


In [4]:
rules_dict = json.load(open("../output/" + dir_path + rules_file))
rules_dict = {int(k):v for k,v in rules_dict.items()}
rules_statistics(rules_dict)

Number of relations with rules:  427
Total number of rules:  26881
Number of rules by length:  [(1, 7847), (2, 7328), (3, 11706)]


In [5]:
temporal_walk = Temporal_Walk(data.train_idx, data.inv_relation_id, transition_distr="exp")
rl = Rule_Learner(temporal_walk.edges, data.id2relation, data.inv_relation_id, dataset)

#### Rule Learning

In [7]:
rl.rules_dict = dict()
length = 2
rel_idx = 42

walk_successful = False
while not walk_successful:
    walk_successful, walk = temporal_walk.sample_walk(length+1, rel_idx)
rl.create_rule(walk)
print("Walk: ", walk)
try:
    print("Rule: ", rl.rules_dict[rel_idx][0])
    print(verbalize_rule(rl.rules_dict[rel_idx][0], data.id2relation))
except KeyError:   
    print("No rule created because of missing body or rule support.")

Walk:  {'entities': [1161, 2442, 1540, 1161], 'relations': [42, 103, 236], 'timestamps': [201, 198, 170]}
Rule:  {'head_rel': 42, 'body_rels': [6, 333], 'var_constraints': [], 'conf': 0.006803, 'rule_supp': 1, 'body_supp': 147}
0.006803     1   147  Bring_lawsuit_against(X0,X2,T2) <- Accuse(X0,X1,T0), _Engage_in_negotiation(X1,X2,T1)


#### Rule Application

In [8]:
query_idx = 42
window = 0
top_k = 20
learn_edges = data.train_idx

In [9]:
test_query = data.test_idx[query_idx]
print(test_query)
print(data.id2entity[test_query[0]], data.id2relation[test_query[1]], 
      data.id2entity[test_query[2]], data.id2ts[test_query[3]])

[ 956   53 4702  314]
Catherine_Ashton Consult Mohammad_Javad_Zarif 2014-11-11


In [10]:
all_walks = []
score_func = score_12
args = [[0.1, 0.5]]  
cands_dict = [dict() for _ in range(len(args))]
dicts_idx = list(range(len(args)))
cur_ts = test_query[3]
edges = ra.get_window_edges(data.all_idx, cur_ts, learn_edges, window)

if test_query[1] in rules_dict:
    for rule in rules_dict[test_query[1]]:
        walk_edges = ra.match_body_relations_complete(rule, edges, test_query[0])
        rule_walks = pd.DataFrame()
        if 0 not in [len(x) for x in walk_edges]:
            rule_walks = ra.get_walks_complete(rule, walk_edges)
            if rule['var_constraints']:
                rule_walks = ra.check_var_constraints(rule['var_constraints'], rule_walks)
        all_walks.append(rule_walks)
        if not rule_walks.empty:
            cands_dict = ra.get_candidates(rule, rule_walks, cur_ts, cands_dict, 
                                           score_func, args, dicts_idx)
            top_k_scores = [v for _, v in sorted(cands_dict[0].items(), key=lambda item: item[1],
                            reverse=True)][:top_k]
            unique_scores = list(scores for scores,_ in itertools.groupby(top_k_scores))
            if len(unique_scores) >= top_k:
                break
    if cands_dict:
        candidates = dict(sorted(cands_dict[0].items(), key=lambda item: item[1], reverse=True))
    else:
        candidates = {}
        print("No candidates found after rule application.")
else:
    candidates = {}
    print("No rules exist for this query relation.")

In [11]:
i = 0  # There exist walks that match the rule body but not the time/variable constraints
print(rules_dict[test_query[1]][i])
all_walks[i]

{'head_rel': 53, 'body_rels': [127, 286, 196], 'var_constraints': [[0, 2]], 'conf': 1.0, 'rule_supp': 4, 'body_supp': 4}


Unnamed: 0,entity_0,relation_0,entity_1,timestamp_0,relation_1,entity_2,timestamp_1,relation_2,entity_3,timestamp_2


In [12]:
i = 1  # There exist no walks that match the rule body
print(rules_dict[test_query[1]][i])
all_walks[i]

{'head_rel': 53, 'body_rels': [392, 443, 357], 'var_constraints': [[0, 2]], 'conf': 1.0, 'rule_supp': 1, 'body_supp': 1}


In [13]:
i = 3 
print(rules_dict[test_query[1]][i])
all_walks[i]

{'head_rel': 53, 'body_rels': [162, 333, 392], 'var_constraints': [[0, 2]], 'conf': 0.625, 'rule_supp': 25, 'body_supp': 40}


Unnamed: 0,entity_0,relation_0,entity_1,timestamp_0,relation_1,entity_2,timestamp_1,relation_2,entity_3,timestamp_2
56,956,162,4702,48,333,956,67,392,3341,96
57,956,162,4702,48,333,956,67,392,6293,125
58,956,162,4702,48,333,956,67,392,4702,145
59,956,162,4702,48,333,956,67,392,3155,146
60,956,162,4702,48,333,956,67,392,4702,146
...,...,...,...,...,...,...,...,...,...,...
1721,956,162,3155,153,333,956,292,392,3341,305
1722,956,162,3155,153,333,956,292,392,3341,312
1723,956,162,3155,153,333,956,292,392,4702,312
1724,956,162,3155,153,333,956,292,392,3341,313


In [14]:
print(ra.verbalize_walk(all_walks[3].iloc[0], data))

Catherine_Ashton	Meet_at_a_'third'_location	Mohammad_Javad_Zarif	2014-02-18	_Engage_in_negotiation	Catherine_Ashton	2014-03-09	_Meet_at_a_'third'_location	John_Kerry	2014-04-07


In [15]:
candidates

{4702: [0.7681121,
  0.72186536,
  0.7608129,
  0.49829265,
  0.6516088,
  0.63959795,
  0.22397095,
  0.21621622,
  0.20286654,
  0.25540158,
  0.19099829,
  0.43011084,
  0.42923483,
  0.20551877,
  0.5414048,
  0.5711301,
  0.61069965,
  0.19086082,
  0.6087973,
  0.15365991,
  0.1375,
  0.117024675,
  0.14674145,
  0.5654622,
  0.4831542,
  0.52172494,
  0.11160942,
  0.5190428,
  0.33336014,
  0.107144386,
  0.5109914,
  0.5078502,
  0.097586185,
  0.095694035,
  0.33415124,
  0.085304424,
  0.53206474,
  0.07882883,
  0.07492403,
  0.071431965,
  0.07038601,
  0.06944672,
  0.066535234,
  0.09464927,
  0.06045594,
  0.058943342,
  0.058825053,
  0.054979254,
  0.047514707,
  0.046184737,
  0.07957977,
  0.045454595,
  0.044354837,
  0.0437556,
  0.44816804,
  0.03197676,
  0.306015,
  0.33463788,
  0.031100478,
  0.030844156,
  0.035959903,
  0.10370993,
  0.4807206,
  0.027227722,
  0.02719686,
  0.47958168,
  0.027027065,
  0.026960785,
  0.026785715,
  0.4786284,
  0.025821596