In [18]:
import os
import pickle
import shutil
import gc
import bisect
import copy
import time
from tqdm import tqdm
from collections import defaultdict, Counter
from aser.database.base import SqliteDBConnection, MongoDBConnection
from aser.database.kg_connection import CHUNKSIZE
from aser.database.kg_connection import EVENTUALITY_TABLE_NAME, EVENTUALITY_COLUMNS, EVENTUALITY_COLUMN_TYPES
from aser.database.kg_connection import RELATION_TABLE_NAME, RELATION_COLUMNS, RELATION_COLUMN_TYPES
from aser.concept.concept_extractor import ASERConceptExtractor
from aser.concept.concept_connection import ASERConceptConnection
from aser.concept.concept_connection import CONCEPT_TABLE_NAME, CONCEPT_COLUMNS, CONCEPT_COLUMN_TYPES
from aser.concept.concept_connection import CONCEPTINSTANCEPAIR_TABLE_NAME, CONCEPTINSTANCEPAIR_COLUMNS, CONCEPTINSTANCEPAIR_COLUMN_TYPES
from aser.extract.aser_extractor import DiscourseASERExtractor
from aser.eventuality import Eventuality
from aser.relation import Relation, relation_senses

In [6]:
def convert_row_to_eventuality(row):
    eventuality = Eventuality().decode(row["info"])
    eventuality.eid = row["_id"]
    eventuality.frequency = row["frequency"]
    eventuality.pattern = row["pattern"]
    return eventuality

def convert_row_to_relation(row):
    return Relation(row["hid"], row["tid"], {r: cnt for r, cnt in row.items() if isinstance(cnt, float) and cnt > 0.0})

def build_concept_instance_table_from_aser_kg(aser_conceptualizer, erows):
    cid2concept = dict()
    concept_instance_pairs = []
    cid_to_filter_score = dict()
    for erow in tqdm(erows):
        event = convert_row_to_eventuality(erow)
        results = aser_conceptualizer.conceptualize(event)
        for concept, score in results:
            if concept.cid not in cid2concept:
                cid2concept[concept.cid] = copy.copy(concept)
            concept = cid2concept[concept.cid]
            if (event.eid, event.pattern, score) not in concept.instances:
                concept.instances.append(((event.eid, event.pattern, score)))
                if concept.cid not in cid_to_filter_score:
                    cid_to_filter_score[concept.cid] = 0.0
                cid_to_filter_score[concept.cid] += score * event.frequency
            concept_instance_pairs.append((concept, event, score))
    return cid2concept, concept_instance_pairs, cid_to_filter_score

def build_concept_relation_table_from_aser_kg(aser_concept_conn, rrows):
    rid2relation = dict()
    hid2related_events = defaultdict(list)
    for rrow in rrows:
        relation = convert_row_to_relation(rrow)
        hid2related_events[rrow["hid"]].append((rrow["tid"], relation))
        
    for h_cid in tqdm(aser_concept_conn.cids):
        instances = aser_concept_conn.get_eventualities_given_concept(h_cid)
        for h_eid, pattern, instance_score in instances:
            # eid -> event -> related eids -> related events, relations -> related concepts, relations
            related_events = hid2related_events[h_eid]
            for t_eid, relation in related_events:
                concept_score_pairs = aser_concept_conn.get_concepts_given_eventuality(t_eid)
                for t_concept, score in concept_score_pairs:
                    t_cid = t_concept.cid
                    if h_cid == t_cid:
                        continue
                    rid = Relation.generate_rid(h_cid, t_cid)
                    if rid not in rid2relation:
                        rid2relation[rid] = Relation(h_cid, t_cid)
                    rid2relation[rid].update(
                        {k: v * instance_score * score for k, v in relation.relations.items()})
    return rid2relation

In [7]:
kg_path = "/home/xliucr/ASER/database/core_2.0/all/KG.db"

kg_conn = SqliteDBConnection(kg_path, CHUNKSIZE)
eid2eventuality = dict()
rid2relation = dict()

for erow in kg_conn.get_columns(EVENTUALITY_TABLE_NAME, EVENTUALITY_COLUMNS):
    eventuality = convert_row_to_eventuality(erow)
    eid2eventuality[eventuality.eid] = eventuality
    
for rrow in kg_conn.get_columns(RELATION_TABLE_NAME, RELATION_COLUMNS):
    relation = convert_row_to_relation(rrow)
    rid2relation[relation.rid] = relation

kg_conn.close()
gc.collect()

216

In [None]:
import pickle

with open("/home/xliucr/ASER/database/core_2.0/all/rid2sids.pkl", "rb") as f:
    rid2sids = pickle.load(f)

In [8]:
aser_extractor = DiscourseASERExtractor(corenlp_path="x", corenlp_port=15000)



In [9]:
aser_conceptualizer = ASERConceptExtractor(
    method="probase",
    # probase_path="/data/hjpan/probase/data-concept-instance-relations-yq.txt",
    # probase_path=r"D:\Data\probase\data-concept-instance-relations-yq.txt",
    probase_path="/home/data/corpora/probase/data-concept-instance-relations-demo.txt",
    probase_topk=5)

[probase-concept] Loading Probase files...


  0%|          | 79/33377320 [00:01<134:37:51, 68.87it/s]

[probase-concept] Building index...


100%|██████████| 33377320/33377320 [02:02<00:00, 272556.59it/s]


[probase-concept] Loading data finished in 138.20 s


In [None]:
r = aser_extractor.extract_from_text("PERSON feel hungry.")

In [None]:
r[0][0][0].__dict__

In [None]:
r[0][0][0].skeleton_words

In [None]:
r = aser_extractor.extract_from_text("PERSON eat food.")

In [None]:
r[0][0][0].__dict__

In [None]:
be_hungry_list = list()
feel_hungry_list = list()
eat_food_list = list()
have_breakfast_list = list()
have_lunch_list = list()
have_dinner_list = list()

for eid, e in tqdm(eid2eventuality.items()):
    if e.pattern != "s-be-a" and e.pattern != "s-v-a" and e.pattern != "s-v-o":
        continue
    skeleton_words = e.skeleton_words
    if e.pattern == "s-be-a" and skeleton_words[-2] == "be" and skeleton_words[-1] == "hungry":
        be_hungry_list.append(eid)
    if e.pattern == "s-v-a" and skeleton_words[-2] == "feel" and skeleton_words[-1] == "hungry":
        feel_hungry_list.append(eid)
    if e.pattern == "s-v-o" and skeleton_words[-2] == "eat" and skeleton_words[-1] == "food":
        eat_food_list.append(eid)
    if e.pattern == "s-v-o" and skeleton_words[-2] == "have":
        if skeleton_words[-1] == "breakfast":
            have_breakfast_list.append(eid)
        elif skeleton_words[-1] == "lunch":
            have_lunch_list.append(eid)
        elif skeleton_words[-1] == "dinner":
            have_dinner_list.append(eid)

In [None]:
print(len(be_hungry_list), len(feel_hungry_list), len(eat_food_list), len(have_breakfast_list), len(have_lunch_list), len(have_dinner_list))

In [None]:
be_hungry_to_eat_food = Relation(
    aser_extractor.extract_from_text("PERSON be hungry.")[0][0][0].eid,
    aser_extractor.extract_from_text("PERSON eat food.")[0][0][0].eid
)

be_hungry_to_have_breakfast = Relation(
    aser_extractor.extract_from_text("PERSON be hungry.")[0][0][0].eid,
    aser_extractor.extract_from_text("PERSON have breakfast.")[0][0][0].eid
)

be_hungry_to_have_lunch = Relation(
    aser_extractor.extract_from_text("PERSON be hungry.")[0][0][0].eid,
    aser_extractor.extract_from_text("PERSON have lunch.")[0][0][0].eid
)

be_hungry_to_have_dinner = Relation(
    aser_extractor.extract_from_text("PERSON be hungry.")[0][0][0].eid,
    aser_extractor.extract_from_text("PERSON have dinner.")[0][0][0].eid
)


In [None]:
for e1 in tqdm(be_hungry_list):
    for e2 in eat_food_list:
        rid = Relation.generate_rid(e1, e2)
        r = rid2relation.get(rid, None)
        if r:
            be_hungry_to_eat_food.update(r.relations)
    for e2 in have_breakfast_list:
        rid = Relation.generate_rid(e1, e2)
        r = rid2relation.get(rid, None)
        if r:
            be_hungry_to_have_breakfast.update(r.relations)
    for e2 in have_lunch_list:
        rid = Relation.generate_rid(e1, e2)
        r = rid2relation.get(rid, None)
        if r:
            be_hungry_to_have_lunch.update(r.relations)
    for e2 in have_dinner_list:
        rid = Relation.generate_rid(e1, e2)
        r = rid2relation.get(rid, None)
        if r:
            be_hungry_to_have_dinner.update(r.relations)

In [None]:
be_hungry_to_eat_food

In [None]:
be_hungry_to_have_breakfast

In [None]:
be_hungry_to_have_lunch

In [None]:
be_hungry_to_have_dinner

In [None]:
eat_food_to_be_hungry = Relation(
    aser_extractor.extract_from_text("PERSON eat food.")[0][0][0].eid,
    aser_extractor.extract_from_text("PERSON be hungry.")[0][0][0].eid
)
print(eat_food_to_be_hungry)

In [None]:
for e1 in be_hungry_list:
    for e2 in eat_food_list:
        rid = Relation.generate_rid(e1, e2)
        r = rid2relation.get(rid, None)
        if r:
            print(rid, r)
            print()

In [None]:
rid2sids["54e31deec8120df8a64a11f7c6b734179a45b828"], rid2relation["54e31deec8120df8a64a11f7c6b734179a45b828"]

In [None]:
rid2sids["e16aaaf2d465d4dbcb24fd446c4b384d16a029f5"], rid2relation["e16aaaf2d465d4dbcb24fd446c4b384d16a029f5"]

In [None]:
rid2sids["4a213852913f255a88efbe9844a6e969efc8d367"], rid2relation["4a213852913f255a88efbe9844a6e969efc8d367"]

In [None]:
rid2sids["106766755d0d3c301b02ee4db9961b8f524f23d7"], rid2relation["106766755d0d3c301b02ee4db9961b8f524f23d7"]

In [None]:
def query_relation(text1, text2, exact_match=True):
    e1 = aser_extractor.extract_eventualities_from_text(text1)[0][0]
    e2 = aser_extractor.extract_eventualities_from_text(text2)[0][0]
    if exact_match:
        return rid2relation[Relation.generate_rid(e1.eid, e2.eid)]
    else:
        relation = Relation(e1.eid, e2.eid)
        for rid, r in rid2relation.items():
            if r.hid == e1.eid:
                tail = eid2eventuality[r.tid]
                if tail.pattern == e2.pattern and tail.skeleton_words == e2.skeleton_words:
                    relation.update(r.relations)
            if r.tid == e2.eid:
                head = eid2eventuality[r.hid]
                if head.pattern == e1.pattern and head.skeleton_words == e1.skeleton_words:
                    relation.update(r.relations)
        return relation

In [None]:
query_relation("I sleep", "I am tired")

In [None]:
query_relation("I sleep", "I am tired", exact_match=False)

In [None]:
query_relation("I want to sleep", "I am tired")

In [None]:
query_relation("I want to sleep", "I am tired", exact_match=False)

In [63]:
# e1 = aser_extractor.extract_from_text("I am hungry.")[0][0][0]
# e2 = aser_extractor.extract_from_text("I am thirsty.")[0][0][0]
# e3 = aser_extractor.extract_from_text("I am full.")[0][0][0]
# e4 = aser_extractor.extract_from_text("He orders beef")[0][0][0]
e5 = aser_extractor.extract_from_text("He gives me beef")[0][0][0]

In [80]:
# c1 = aser_conceptualizer.conceptualize(e1)[0][0]
# c2 = aser_conceptualizer.conceptualize(e2)[0][0]
# c3 = aser_conceptualizer.conceptualize(e3)[0][0]
# c4 = aser_conceptualizer.conceptualize(e4)[0][0]
c5 = aser_conceptualizer.conceptualize(e5)[2][0]

In [17]:
concept_kg_path = "/home/data/corpora/aser/concept/0.3/5/concept.db"
concept_conn = SqliteDBConnection(concept_kg_path, CHUNKSIZE)

In [34]:
c1_e1 = json.loads(list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(CONCEPT_COLUMNS), CONCEPT_TABLE_NAME, c1.cid)))[0][-1])

In [35]:
c2_e2 = json.loads(list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(CONCEPT_COLUMNS), CONCEPT_TABLE_NAME, c2.cid)))[0][-1])

In [36]:
c3_e3 = json.loads(list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(CONCEPT_COLUMNS), CONCEPT_TABLE_NAME, c3.cid)))[0][-1])

In [61]:
c4_e4 = json.loads(list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(CONCEPT_COLUMNS), CONCEPT_TABLE_NAME, c4.cid)))[0][-1])

In [82]:
c5_e5 = json.loads(list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(CONCEPT_COLUMNS), CONCEPT_TABLE_NAME, c5.cid)))[0][-1])

In [98]:
sum([e[2] for e in c4_e4["instances"]])

27.70513914380411

In [99]:
list(zip(RELATION_COLUMNS, list(concept_conn._conn.execute("SELECT %s FROM %s WHERE _id == \"%s\"; " % (",".join(RELATION_COLUMNS), RELATION_TABLE_NAME, Relation.generate_rid(c1.cid, c4.cid))))[0]))

[('_id', 'c0448909c96a4fce90c6d30ce034376db5d45683'),
 ('hid', '7669465a3de773b48e3869b597f21275bc9cb4ee'),
 ('tid', '1887f3678c49b6ea75c70dec6e5f0e91ac722d5e'),
 ('Precedence', 0.0),
 ('Succession', 0.0),
 ('Synchronous', 0.0),
 ('Reason', 0.0),
 ('Result', 0.07749597423510467),
 ('Condition', 0.0),
 ('Contrast', 0.0),
 ('Concession', 0.0),
 ('Conjunction', 0.05319645356976201),
 ('Instantiation', 0.0),
 ('Restatement', 0.0),
 ('ChosenAlternative', 0.0),
 ('Alternative', 0.0),
 ('Exception', 0.0),
 ('Co_Occurrence', 0.19096707443217031)]

In [101]:
for x in c1_e1["instances"]:
    for y in c4_e4["instances"]:
        z = Relation.generate_rid(x[0], y[0])
        z = rid2relation.get(z, None)
        if z is None:
            continue
        z = z.relations.get("Result", 0.0)
        if z == 0.0:
            continue
        print(" ".join(eid2eventuality[x[0]].words), "(%.3f)" % (x[2]),  "Result (%.3f)" % (z), " ".join(eid2eventuality[y[0]].words), "(%.3f)" % (y[2]))

i be hungry (1.000) Result (0.125) i order orange chicken (0.069)
i be not too hungry (1.000) Result (1.000) i order the fried chicken (0.069)


In [88]:
x

['5fe81d2719c163f4ddf33d2ac133d47e27a0adaf', 's-v-o', 0.06888531043120415]

In [85]:
c5

__PERSON__0 give __PERSON__1 red-meat