In [1]:
import sys

import networkx as nx
import numpy as np
from tqdm import trange

sys.path.append('../')
from utils.aser_to_glucose import generate_aser_to_glucose_dict

In [2]:
# Load the Glucose matching result and unusable dicts
glucose_matching = np.load('../process_dataset/Final_Version/glucose_final_matching.npy', allow_pickle=True).item()
fail_index = np.load('./unusable_index.npy', allow_pickle=True).item()

In [3]:
total_head, total_tail = [], []
both_head, both_tail = [], []

In [4]:
for i in trange(1, 11):
    for ind in glucose_matching[i].keys():
        if ind in fail_index['head'][i] or ind in fail_index['tail'][i]:
            continue
        else:
            total_head.extend(glucose_matching[i][ind]['total_head'])
            total_tail.extend(glucose_matching[i][ind]['total_tail'])
            both_head.extend([h[0] for h in glucose_matching[i][ind]['both']])
            both_tail.extend([h[1] for h in glucose_matching[i][ind]['both']])
total_tail = list(np.unique(total_tail))
total_head = list(np.unique(total_head))
both_tail = list(np.unique(both_tail))
both_head = list(np.unique(both_head))
print(
    "There are total {} unique heads and {} unique tails.\nAmong which, {} heads and {} tails contribute are connected by edges.".format(
        len(total_head), len(total_tail), len(both_head), len(both_tail)))

100%|██████████| 10/10 [00:02<00:00,  4.13it/s]


There are total 123267 unique heads and 127853 unique tails.
Among which, 97922 heads and 100624 tails contribute are connected by edges.


In [5]:
# Add the edges!
G_Glucose = nx.DiGraph()
for i in trange(1, 11):
    for ind in glucose_matching[i].keys():
        if ind in fail_index['head'][i] or ind in fail_index['tail'][i]:
            continue
        else:
            for h, t in glucose_matching[i][ind]['both']:
                _, re_h, re_t, _ = generate_aser_to_glucose_dict(h, t, True)
                if (re_h, re_t) not in G_Glucose:
                    G_Glucose.add_edge(re_h, re_t, dataset='GLUCOSE', relation='Cause', list_id=i,
                                       hid=glucose_matching[i][ind]['head_id'], tid=glucose_matching[i][ind]['tail_id'])
            for head in glucose_matching[i][ind]['total_head']:
                for tail in glucose_matching[i][ind]['total_tail']:
                    _, re_head, re_tail, _ = generate_aser_to_glucose_dict(head, tail)
                    G_Glucose.add_node(re_head, dataset='GLUCOSE', list_id=i, hid=glucose_matching[i][ind]['head_id'])
                    G_Glucose.add_node(re_tail, dataset='GLUCOSE', list_id=i, tid=glucose_matching[i][ind]['tail_id'])
                    if head.split(' ')[0] == tail.split(' ')[0]:
                        if (re_head, re_tail) not in G_Glucose:
                            G_Glucose.add_edge(re_head, re_tail, dataset='GLUCOSE', relation='Cause', list_id=i,
                                               hid=glucose_matching[i][ind]['head_id'],
                                               tid=glucose_matching[i][ind]['tail_id'])

100%|██████████| 10/10 [02:07<00:00, 12.79s/it]


In [6]:
edge=list(G_Glucose.edges.data())
node=list(G_Glucose.nodes.data())
for i in range(10,30):
    print(edge[i])
    print(node[i])

('PersonX go to game', 'PersonX give up a home run', {'dataset': 'GLUCOSE', 'relation': 'Cause', 'list_id': 6, 'hid': 11080, 'tid': 11080})
('PersonX hold back', {'dataset': 'GLUCOSE', 'list_id': 1, 'tid': 7})
('PersonX go to game', 'PersonX watch the game', {'dataset': 'GLUCOSE', 'relation': 'Cause', 'list_id': 6, 'hid': 1981, 'tid': 1981})
('PersonX hold back tear', {'dataset': 'GLUCOSE', 'list_id': 1, 'tid': 7})
('PersonX go to game', 'PersonX feel disappointed', {'dataset': 'GLUCOSE', 'relation': 'Cause', 'list_id': 7, 'hid': 11080, 'tid': 11080})
('PersonY hold back', {'dataset': 'GLUCOSE', 'list_id': 1, 'tid': 7})
('PersonX go to game', 'PersonX be at home', {'dataset': 'GLUCOSE', 'relation': 'Cause', 'list_id': 8, 'hid': 11080, 'tid': 11080})
('PersonY hold back tear', {'dataset': 'GLUCOSE', 'list_id': 1, 'tid': 7})
('PersonX go to game', 'PersonY have be', {'dataset': 'GLUCOSE', 'relation': 'Cause', 'list_id': 10, 'hid': 11080, 'tid': 11080})
('PersonX like to grab it', {'datas

In [7]:
nx.write_gpickle(G_Glucose, './G_Glucose.pickle')
nx.write_gpickle(G_Glucose, '../../dataset/G_Glucose.pickle')

In [8]:
print("Total Edges in Glucose: {}".format(sum([1 for _, _, feat_dict in G_Glucose.edges.data()])))
print("Total Nodes in Glucose: {}".format(sum([1 for _, feat_dict in G_Glucose.nodes.data()])))

Total Edges in Glucose: 236323
Total Nodes in Glucose: 92729


In [10]:
nodes=list(G_Glucose.nodes.keys())
print('PersonX eat something' in nodes)
print('PersonX have a something' in nodes)
print('PersonX have a it' in nodes)
print('PersonX eat a something' in nodes)
print('PersonX eat a it' in nodes)
print('PersonX wait for it' in nodes)
print('PersonX wait for something' in nodes)

True
True
True
True
True
True
True
