In [2]:
import networkx as nx

G_aser = nx.read_gpickle("/home/data/tfangaa/CKGP/data/ASER_raw_data/G_aser_norm_nodefilter_di_with_pattern.pickle")

cs_rels = ['oEffect', 'xEffect', 'general Effect',
           'oWant','xWant','general Want',
           'oReact', 'xReact',   'general React',
           'xAttr',  'xNeed', 'xIntent', 
           'isBefore',  'isAfter', 'HinderedBy', 
           'Causes',   'xReason', 'HasSubEvent',]
atomic_rels = ['oEffect', 'xEffect', 'oWant','xWant', 
               'oReact', 'xReact', 'xAttr',  'xNeed', 'xIntent',
              'isBefore', 'isAfter', 'HinderedBy']
cs_rel2cat = {
  'oEffect':'Effects', 'xEffect':'Effects', 'general Effect':'Effects',
   'oWant':'Effects','xWant':'Effects', 'general Want':'Effects',
   'oReact':'Reacts', 'xReact':'Reacts', 'general React':'Reacts',
   'xAttr':'Stative',  'xNeed':'CausedBy', 'xIntent':'CausedBy', 
   'isBefore':'Effects',  'isAfter':'CausedBy', 'HinderedBy':'HinderedBy', 
   'Causes':'Effects',   'xReason':'CausedBy', 'HasSubEvent':'HasSubEvent',
}
cat_edge_rules = {
  "Effects":{
    "out":["Result", "Precedence"],
    "in":["Condition", "Succession", "Reason"],
    "both_dir":[],
  },
  "Reacts":{
    "out":["Result", "Precedence"],
    "in":["Condition", "Succession", "Reason"],
    "both_dir":[],
  },
  "CausedBy":{
    "out":["Condition", "Succession", "Reason"],
    "in":["Result", "Precedence"],
    "both_dir":[],
  },
  "Stative":{
    "out" :[],
    "in":[],
    "both_dir":["Synchronous", "Reason", "Result", "Condition", "Conjunction", "Restatement"],
  },
  "HinderedBy":{
    "out" :[],
    "in":[],
    "both_dir":["Concession", "Alternative"],
  },
  "HasSubEvent":{
    "out" :[],
    "in":[],
    "both_dir":["Synchronous", "Conjunction"],
  }
}
cat_node_rules = {
  "Effects":[],
"Reacts":["s-v-a", "s-be-a", "s-v"],
"CausedBy":[],
"Stative":["s-v-a", "s-be-a", "spass-v"],
"HinderedBy":[],
"HasSubEvent":[],
}

|relation|ASER edges types|ASER nodes types|
|:--:|:--:|:--:|
|Effect | Result, Precedence, Condition^-1, Succession^-1, Reason^-1 | - |
|Reaction| Result, Precedence, Condition^-1, Succession^-1, Reason^-1 |s-v/be-a/o, s-v-be-a/o, s-v, spass-v |
|CausedBy | Condition, Succession, Reason, Result^-1, Precedence^-1, | - |
|Stative | Synchronous^±1, Reason^±1, Result^±1, Condition^±1, Conjunction^±1, Restatement^±1 |s-v/be-a/o, s-v-be-a/o, s-v, spass-v|
|HinderedBy| Concession, Alternative | - |
|HasSubEvent| Synchronous^±1, Conjunction^±1 | -|

In [3]:
# 1. test set heads

def check_relation(aser_rel_list, candi_rel_list):
    """
      Check if any relation in aser_rel_list is in candi_rel_list
    """
    return any(r_aser in candi_rel_list for r_aser in aser_rel_list)
def check_node_pattern(patterns, selected_patterns=[]):
    """
      patterns: a list of patterns of a node.
      selected_patterns: a list. if [], then return True anyway
    """
    if len(selected_patterns) == 0:
        return True
    else:
        return any(p in selected_patterns for p in patterns)

from tqdm import tqdm
candidates = dict([(r, []) for r in cs_rels])

for head, tail, feat in tqdm(G_aser.edges.data()):
    for r in atomic_rels:
        # check if this is an ASER edge that suits the 
        edge_rules = cat_edge_rules[cs_rel2cat[r]]
        node_rules = cat_node_rules[cs_rel2cat[r]]
        if check_relation(feat["relation"].keys(), edge_rules["out"] + edge_rules["both_dir"] )\
            and check_node_pattern(G_aser.nodes[tail]["patterns"], node_rules):
            candidates[r].append((head, tail))
        if check_relation(feat["relation"].keys(), edge_rules["in"] + edge_rules["both_dir"] )\
            and check_node_pattern(G_aser.nodes[head]["patterns"], node_rules):
            candidates[r].append((tail, head))

100%|██████████| 69002035/69002035 [30:24<00:00, 37815.03it/s]


In [9]:
for r in atomic_rels:
    candidates[r] = list(set(candidates[r]))

# post process filter
# remove triples that doesn't follow the definition of relations:
# e.g., for xEffect, the tail must start with PersonX.
filtered_triples = dict([(r, []) for r in cs_rels])
for r in atomic_rels:
    if r.startswith("x"):
        for head, tail in tqdm(candidates[r]):
            if head.startswith("PersonX") and tail.startswith("PersonX"):
                filtered_triples[r].append((head, tail))
    elif r.startswith("o"):
        for head, tail in tqdm(candidates[r]):
            if head.startswith("PersonX") and tail.startswith("PersonY"):
                filtered_triples[r].append((head, tail))
    else:
        filtered_triples[r] = candidates[r]

100%|██████████| 13766390/13766390 [00:51<00:00, 267082.51it/s]
100%|██████████| 13766390/13766390 [00:16<00:00, 834940.90it/s]
100%|██████████| 13766390/13766390 [00:15<00:00, 908696.15it/s]
100%|██████████| 13766390/13766390 [00:14<00:00, 935796.71it/s]
100%|██████████| 8370927/8370927 [00:08<00:00, 1006224.82it/s]
100%|██████████| 8370927/8370927 [00:08<00:00, 964141.37it/s] 
100%|██████████| 21625813/21625813 [00:23<00:00, 929434.52it/s] 
100%|██████████| 13766390/13766390 [00:13<00:00, 990868.42it/s] 
100%|██████████| 13766390/13766390 [00:13<00:00, 998891.64it/s] 


In [12]:
sum([len(filtered_triples[r]) for r in atomic_rels])
for r in atomic_rels:
    print(r, len(filtered_triples[r]))

oEffect 901681
xEffect 1784749
oWant 901681
xWant 1784749
oReact 541084
xReact 1055518
xAttr 2549305
xNeed 1784749
xIntent 1784749
isBefore 13766390
isAfter 13766390
HinderedBy 2832829


In [11]:
import numpy as np
np.save("../data/DISCOS_infer_candidates_filter", filtered_triples)