In [31]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag

In [32]:
%run ./utils.ipynb
%run ./nlp.ipynb
%run ./relation_extraction.ipynb

In [33]:
text, relations = extract_relations(
    "parsed/race/train/middle/2549.txt.tree"
)

In [34]:
rel = relations["Explanation"][2]

In [35]:
def do_subjects_differ(text, rel, n_prev_sent=5):
    new_st, left_segment_sent_no = move_st(text, rel.left.start, n_prev_sent)
    if len({".", "!", "?"}.intersection(
        set(word_tokenize(text[rel.left.start:rel.right.end])))) > 0:
        right_segment_sent_no = left_segment_sent_no + 1
    else:
        right_segment_sent_no = left_segment_sent_no

    if left_segment_sent_no == right_segment_sent_no:
        return True
    else:
        resolved_subjects = get_resolved_subjects(text[new_st:rel.right.end])
        if len(resolved_subjects) < right_segment_sent_no:
            return True
        else:
            return (
                resolved_subjects[left_segment_sent_no] 
                != resolved_subjects[right_segment_sent_no]
            )

        
def test__do_subjects_differ():
    text = """Nika lives in Berlin. She goes to a Kita. Her favourite color is yellow."""
    assert do_subjects_differ(
        text, 
        Relation("", Segment("N", 22, 42), Segment("S", 42, len(text)), None, None)
    )
    text = """Nika lives in Berlin. She goes to a Kita. She likes yellow things."""
    assert not do_subjects_differ(
        text, 
        Relation("", Segment("N", 22, 42), Segment("S", 42, len(text)), None, None)
    )    

    
test__do_subjects_differ()

In [36]:
def search_segment(text, relation, direction):
    if direction == "left":
        child, segment = relation.left_child, relation.left
    else:
        child, segment = relation.right_child, relation.right
    if child:
        return search_segment(
            text, 
            child, 
            direction
        )
    else:
        return text[segment.start:segment.end]
    
    
def get_depth(relation):
    if relation:
        return 1 + max(
            get_depth(relation.left_child), 
            get_depth(relation.right_child)
        )
    else:
        return 0

In [9]:
def rule_explanation_01(text, relation):
    
    
    assert(relation is not None and relation.type == "Explanation")
    satellite_relation, _ = relation.get_satellite()
    if satellite_relation and satellite_relation.type == "Elaboration":
        # Checking if the nucleus is on the right/left of the
        # satellite in the Explanation relation
        if relation.left.type == "N":
            nucleus_direction = "right"
            satellite_direction = "left"
            nucleus_relation = relation.left_child
            nucleus_segment = relation.left
        else:
            nucleus_direction = "left"
            satellite_direction = "right"
            nucleus_relation = relation.right_child
            nucleus_segment = relation.right
        
        # if there are nested relations in the nucleus
        # and there are too many of them
        if nucleus_relation and get_depth(nucleus_relation) > 3:
            expl_text = clean( 
                search_segment( # take the closest nested segment relative to the satellite
                    text, 
                    nucleus_relation,
                    nucleus_direction
                )
            )
        else: # else take the nucleus as is
            expl_text = clean(
                text[nucleus_segment.start:nucleus_segment.end]
            )   
        
        # checking if the nucleus of the satellite is on the right/left
        if satellite_relation.left.type == "N":
            satellite_nucleus_relation = satellite_relation.left_child
            satellite_nucleus_segment = satellite_relation.left
            if nucleus_direction == "right": # if the expl. nucleus is on the left
                nucleus_proximity = "near"
            else:
                nucleus_proximity = "far"
        else:
            satellite_nucleus_relation = satellite_relation.right_child
            satellite_nucleus_segment = satellite_relation.right
            if nucleus_direction == "right":
                nucleus_proximity = "far"
            else:
                nucleus_proximity = "near"

        # if there are nested relations in the satellite nucleus
        # and there are too many of them
        if (
            satellite_nucleus_relation 
            and get_depth(satellite_nucleus_relation) > 3
        ):
            elab_text = clean(
                search_segment(# take the closest nested segment relative to the nucleus of Explanation
                    text, 
                    satellite_nucleus_relation, 
                    satellite_direction
                )
            )
        else: # else take the satellite nucleus as is
            elab_text = clean(
                text[
                    satellite_nucleus_segment.start
                    :satellite_nucleus_segment.end
                ]
            )
                
        expl_tokens = word_tokenize(expl_text)
        elab_tokens = word_tokenize(elab_text)
        
        if elab_tokens[0] == "``": # the "quotes" case
            if expl_tokens[0] == "``":
                statement = "According to the text, the answer "\
                            "to the question {explanation} "\
                            "is as follows: {elaboration}.".format(
                                explanation=expl_text,
                                elaboration=elab_text
                            )
            else:
                statement = "{explanation}. This explains why "\
                            "he/she said/asked {elaboration}".format(
                                explanation=remove_trailing_punctuation(
                                    uppercase_first_letter(expl_text)
                                ),
                                elaboration=elab_text
                            )
        else: # regular case
            starts_with_there_is_there_are = (
                expl_tokens[0].lower == "there"
                and expl_tokens[1].lower in ["is", "are"]
            )
            expl_is_background = \
                nucleus_relation and nucleus_relation.type == "Background"
            if starts_with_there_is_there_are or expl_is_background:
                connector = "For example,"
            else:
                connector = "That's why"

            statement = "{explanation}. "\
                        "{connector} {elaboration}.".format(
                explanation=remove_trailing_punctuation(
                    uppercase_first_letter(expl_text)
                ),
                elaboration=remove_trailing_punctuation(
                    lowercase_first_letter(elab_text)
                ),
                connector=connector
            )
                
        return statement, nucleus_proximity
    else:
        return None, None

In [10]:
if "Explanation" in relations:
    for relation in relations["Explanation"]:
        print(rule_explanation_elaboration(text, relation))
        print("")

(None, None)

(None, None)

("He goes to work on foot or by bike. That's why it is far from his home.  He usually works in the day and has lunch outside in the sun.", 'near')

("They also work on weekends. That's why her name is Sara.", 'near')



In [11]:
DIRECTORY = "parsed/race/train/middle"

statements_near, statements_far = [], []
for file_name in os.listdir(DIRECTORY):
    path = os.path.join(DIRECTORY, file_name)
    text, relations = extract_relations(path)
    
    if "Explanation" in relations:
        for relation in relations["Explanation"]:
            statement, nucleus_proximity = rule_explanation_elaboration(
                text, 
                relation
            )
            if statement is not None:
                statement_str = f"[{path}]\n{statement}"
                if nucleus_proximity == "near":
                    statements_near.append(statement_str)
                else:
                    statements_far.append(statement_str)

In [12]:
with open(
    "statements/explanation_elaboration/train/middle_near.txt", 
    "wt"
) as f:
    f.write("\n".join(statements_near))

In [13]:
with open(
    "statements/explanation_elaboration/train/middle_far.txt", 
    "wt"
) as f:
    f.write("\n".join(statements_far))