In [1]:
import os
import json
import collections

from glob import glob
from tqdm import tqdm

In [2]:
temp_agg_dir = '../data/cgw/temporal/lome'
knowledge_dir = '../data/cgw/temporal/causal-cmd'

In [4]:
for filename in os.listdir(temp_agg_dir):
    temp_agg_path = f'{temp_agg_dir}/{filename}'
    knowledge_path = f'{knowledge_dir}/{filename}'

    # Read aggregated temporal relations
    agg_temp_rels = collections.defaultdict(dict)

    with open(temp_agg_path) as fin:
        for line in fin:
            arg0, arg1, rel, cnt = line.strip().split()
            if arg0 != arg1:
                assert rel in ('before', 'after')
                if arg0 < arg1:
                    arg_tuple = (arg0, arg1)
                    arrow = '->' if rel == 'before' else '<-'
                else:
                    arg_tuple = (arg1, arg0)
                    arrow = '<-' if rel == 'before' else '->' # flipped because arg0 and arg1 are flipped
                cnt = int(cnt)
                agg_temp_rels[arg_tuple][arrow] = cnt

    # Construct a set of possible and forbidden edges, according to aggregated temporal ordering stats
    possible_edges = {}
    forbidden_edges = set()

    for arg_tuple, cnts in agg_temp_rels.items():
        arg0, arg1 = arg_tuple
        cnt_before, cnt_after = cnts.get('->', 0) + 1, cnts.get('<-', 0) + 1  # smooth to avoid zeros
        support = max(cnt_before, cnt_after)
        conf = support / min(cnt_before, cnt_after)
        if support > 30 and conf > 3:
            if cnt_before > cnt_after:
                edge = (arg0, arg1)
            else:
                edge = (arg1, arg0)
            inv_edge = (edge[1], edge[0])
            possible_edges[edge] = (support, conf)
            forbidden_edges.add(inv_edge)
    
    # Output knowledge file

    with open(knowledge_path, 'w') as fout:
        fout.write('/knowledge\n\nforbiddirect\n')
        for u, v in forbidden_edges:
            fout.write(f'{u} {v}\n')

---