In [None]:
import re
import csv
import sys
import json
import math
import spacy
import textacy
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from taxonerd import TaxoNERD
from fastcoref import spacy_component
from spacy.matcher import Matcher, DependencyMatcher, PhraseMatcher
%run "./Main.ipynb"

In [None]:
df = pd.read_csv("../Week 16/Datasets/Baseline-1.csv")

i = 0
title = df.loc[i, "Title"]
abstract = df.loc[i, "Abstract"]

print(f"Title: {title}")
print(f"Abstract: {abstract}")

main = Main()
main.update_text(abstract)

In [None]:
def extend_token(doc, token, sent_i):
    ext = []
    
    if token.pos_ == "PRON" and token in main.coref_map:
        ext = [*main.coref_map[token]]
    else:
        ext = [token]


    i = 0
    size = len(ext)
    while i < size:
        ext_token = ext[i]
        for ent_pos, ent in main.parts.reg[sent_i].items():
            if ent_pos[0] <= ext_token.i <= ent_pos[1] and ent.label in [Entity.LIST]:
                ext = [*doc[ent_pos[0]:ent_pos[1]+1]]
                break
        i += 1

    
    i = 0
    size = len(ext)
    while i < size:
        ext_token = ext[i]
        if ext_token in main.noun_chunk_map:
            ext.extend([*main.noun_chunk_map[ext_token]])

        if ext_token in main.ent_map:
            ext.extend([*main.ent_map[ext_token]])

        i += 1

    ext = list(set(ext))
    
    return ext

In [None]:
class Node:
    def __init__(self):
        self.tokens = []
        self.neighbors = []

    def __str__(self):
        ret = ""
        for neighbor in self.neighbors:
            ret += f"{self.tokens}-->{neighbor[0].tokens}-->{neighbor[1].tokens}"
            if neighbor != self.neighbors[-1]:
                ret += "\n"
            
        return ret

# def graph_part(doc, l_i, r_i):
#     rel_tokens = doc[l_i:r_i+1]
#     action_tokens = [*main.cause.tokens, *main.change.tokens]

#     rel_action_tokens = set(rel_tokens).intersection(action_tokens)
#     rel_action_tokens = list(rel_action_tokens)
    
#     if not rel_action_tokens:
#         return None

#     rel_action_tokens = sorted(rel_action_tokens, key=lambda t: t.i)
    
#     partition_i = rel_action_tokens[-1].i

#     sub_node = Node()
#     sub_node.tokens = [*list(doc[l_i:partition_i+1])]

#     obj_node = Node()
#     obj_node.tokens = [*list(doc[partition_i+1:r_i+1])]

#     sub_node.neighbors.append((Node(), obj_node))
    
#     return sub_node

def graph_ent(doc, entities):
    i = 0
    while i < len(entities):
        entity = entities[i]
        entity_tokens = [doc[_] for _ in range(entity.l, entity.r+1)]

        graph_bar(doc, entity_tokens)
        
        i += 1

def graph_bar(doc, tokens):
    # print()
    # print()
    # print(tokens)
    verbs = [token for token in tokens if token.pos_ == "VERB"]
    if not verbs:
        # print("No Verbs")
        return

    verb = verbs[0]
    
    l = tokens[0].i
    r = tokens.index(verb) + 1
    
    while r < len(tokens) and tokens[r].pos_ not in ["PROPN", "NOUN", "PRON"]:
        r += 1

    if r <= 0 or r >= len(tokens):
        return
    
    r = tokens[r].i

    swap = (verb.nbor(-1) and verb.nbor(-1).pos_ == "AUX") or (verb.nbor(1) and verb.nbor(1).lower_ == "by")

    verb_node = Node()
    verb_node.tokens = [verb]
    
    a_node = Node()
    a_node.tokens = [doc[_] for _ in range(verb.i + 1, r + 1)]
    if not a_node.tokens:
        return

    b_node = Node()
    b_node.tokens = [doc[_] for _ in range(l, verb.i)]
    if not b_node.tokens:
        return

    if not swap:
        sub_node = b_node 
        obj_node = a_node
    else:
        sub_node = a_node 
        obj_node = b_node

    # TRANSFER
    sub_transfer_tokens = []
    for token in sub_node.tokens:
        if token.pos_ == "VERB":
            sub_transfer_tokens.append(token)
    sub_node.tokens = [token for token in sub_node.tokens if token not in sub_transfer_tokens]
    obj_node.tokens.extend(sub_transfer_tokens)

    # TRANSFER
    obj_transfer_tokens = []
    for token in obj_node.tokens:
        if token in main.cause.tokens or token in main.change.tokens:
            obj_transfer_tokens.append(token)
    obj_node.tokens = [token for token in obj_node.tokens if token not in obj_transfer_tokens]
    verb_node.tokens.extend(obj_transfer_tokens)
    
    sub_node.neighbors.append((verb_node, obj_node))
    print(sub_node)
    
def graph(doc):
    triples = list(textacy.extract.subject_verb_object_triples(doc))

    sents = list(doc.sents)
    sents_triples = {sent.start: [] for sent in sents}
    
    for triple in triples:
        sents_triples[triple.verb[0].sent.start].append(triple)

    for sent_i, sent in enumerate(sents):
        tokens = [token for token in sent]

        # Subject-Verb-Object Triples
        for triple in sents_triples[sent.start]:
            if triple.subject[0] not in tokens:
                continue

            if triple.object[-1] not in tokens:
                continue

            cont_loop = False
            for verb in triple.verb:
                if verb.lemma_.lower() in ["show", "showed"]:
                    cont_loop = True
                    break
            if cont_loop:
                continue
            
            obj_node = Node()
            obj_node.tokens = [doc[i] for i in range(triple.verb[-1].i+1, triple.object[-1].i+1)]
            obj_node_i = tokens.index(obj_node.tokens[-1])
            
            obj_node_tokens_ext = flatten([extend_token(doc, token, sent_i) for token in obj_node.tokens])
            obj_node_tokens_ext = list(set(obj_node_tokens_ext))
            obj_node_tokens_ext = sorted(obj_node_tokens_ext, key=lambda t: t.i)
            obj_node.tokens = obj_node_tokens_ext
            # print(f"{obj_node.tokens} Extended: {obj_node_tokens_ext}")

            graph_bar(doc, obj_node.tokens)
            
            verb_node = Node()
            verb_node.tokens = [*triple.verb]

            sub_node = Node()
            sub_node.tokens = [doc[i] for i in range(triple.subject[0].i, triple.verb[0].i)]
            sub_node_i = tokens.index(sub_node.tokens[0])
            
            sub_node_tokens_ext = flatten([extend_token(doc, token, sent_i) for token in sub_node.tokens])
            sub_node_tokens_ext = list(set(sub_node_tokens_ext))
            sub_node_tokens_ext = sorted(sub_node_tokens_ext, key=lambda t: t.i)
            sub_node.tokens = sub_node_tokens_ext
            # print(f"{sub_node.tokens} Extended: {sub_node_tokens_ext}")
            sub_node.neighbors.append((verb_node, obj_node))

            graph_bar(doc, sub_node.tokens)
            
            # Update Available Tokens
            for token in sub_node.tokens:
                if token in tokens:
                    sub_node_i = tokens.index(token)

            for token in reversed(obj_node.tokens):
                if token in tokens:
                    obj_node_i = tokens.index(token)

            used_tokens_i = [tokens.index(token) for token in [*sub_node.tokens, *obj_node.tokens] if token in tokens]
            used_tokens_i = sorted(used_tokens_i)
            tokens = tokens[:used_tokens_i[0]] + tokens[used_tokens_i[-1]+1:]

            print(sub_node)
            # print(tokens)

        # Parts
        entities = list(main.parts.reg[sent_i].values())
        graph_ent(doc, entities)

graph(main.sp_doc)

In [None]:
main.sp_doc.ents

In [None]:
i = 0

title = df.loc[i, "Title"]
txt = df.loc[i, "Abstract"]
doc = main.sp_nlp(txt)

print(f"Title: {title}")
print(f"Text: {txt}\n")

subs = []
objs = []
verbs = []

triples = textacy.extract.subject_verb_object_triples(doc)
for triple in triples:
    print(triple)
    subs.extend(triple.subject)
    objs.extend(triple.object)
    verbs.extend(triple.verb)

class Colors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

for token in doc:
    color = Colors.ENDC
    if token in subs:
        color = Colors.OKBLUE
    if token in objs:
        color = Colors.WARNING
    if token in verbs:
        color = Colors.FAIL
    
    # Start of Sentence
    if token.sent.start == token.i:
        print(f"{color}{token.text}", end=f"{Colors.ENDC}")
    # End of Sentence
    elif token.sent.end == token.i + 1:
        print(f"{color}{token.text} ", end=f"{Colors.ENDC}")
    elif token.pos_ in ["PUNCT", "SYM"]:
        print(f"{color}{token.text}", end=f"{Colors.ENDC}")
    # In Sentence
    else:
        print(f"{color} {token.text}", end=f"{Colors.ENDC}")

In [None]:
def bar(i):
    ret = []
    for num in range(0, i):
        ret.append(num)
    return ret

bar(5)

In [None]:
[bar(i) for i in range(1, 5)]

In [None]:
flatten([bar(i) for i in range(1, 5)])