In [1]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag

In [90]:
%run ./utils.ipynb

In [3]:
%run ./relation_extraction.ipynb

In [4]:
text, relations = extract_relations(
    "parsed/race/train/middle/2549.txt.tree"
)

In [69]:
def search_segment(text, relation, direction):
    if direction == "left":
        child, segment = relation.left_child, relation.left
    else:
        child, segment = relation.right_child, relation.right
    if child:
        return search_segment(
            text, 
            child, 
            direction
        )
    else:
        return text[segment.start:segment.end]
    
    
def get_depth(relation):
    if relation:
        return 1 + max(
            get_depth(relation.left_child), 
            get_depth(relation.right_child)
        )
    else:
        return 0

In [137]:
def rule_explanation_elaboration(text, relation):
    assert(relation is not None and relation.type == "Explanation")
    satellite_relation, _ = relation.get_satellite()
    if satellite_relation and satellite_relation.type == "Elaboration":
        if relation.left.type == "N":
            nucleus_direction = "right"
            satellite_direction = "left"
            nucleus_relation = relation.left_child
            nucleus_segment = relation.left
        else:
            nucleus_direction = "left"
            satellite_direction = "right"
            nucleus_relation = relation.right_child
            nucleus_segment = relation.right
        
        if nucleus_relation and get_depth(nucleus_relation) > 3:
            expl_text = clean(
                search_segment(
                    text, 
                    nucleus_relation,
                    nucleus_direction
                )
            )
        else:
            expl_text = clean(
                text[nucleus_segment.start:nucleus_segment.end]
            )   
        
        if satellite_relation.left.type == "N":
            satellite_nucleus_relation = satellite_relation.left_child
            satellite_nucleus_segment = satellite_relation.left
        else:
            satellite_nucleus_relation = \
                satellite_relation.right_child
            satellite_nucleus_segment = satellite_relation.right
    
        if (
            satellite_nucleus_relation 
            and get_depth(satellite_nucleus_relation) > 3
        ):
            elab_text = clean(
                search_segment(
                    text, 
                    satellite_nucleus_relation, 
                    satellite_direction
                )
            )
        else:
            elab_text = clean(
                text[
                    satellite_nucleus_segment.start
                    :satellite_nucleus_segment.end
                ]
            )
                
        expl_tokens = word_tokenize(expl_text)
        elab_tokens = word_tokenize(elab_text)
        
        if elab_tokens[0] == "``":
            if expl_tokens[0] == "``":
                statement = "According to the text, the answer "\
                            "to the question {explanation} "\
                            "is as follows: {elaboration}.".format(
                                explanation=expl_text,
                                elaboration=elab_text
                            )
            else:
                statement = "{explanation}. This explains why "\
                            "he/she said/asked {elaboration}".format(
                                explanation=remove_trailing_punctuation(
                                    uppercase_first_letter(expl_text)
                                ),
                                elaboration=elab_text
                            )
        else:
            starts_with_there_is_there_are = (
                expl_tokens[0].lower == "there"
                and expl_tokens[1].lower in ["is", "are"]
            )
            expl_is_background = \
                nucleus_relation and nucleus_relation.type == "Background"
            if starts_with_there_is_there_are or expl_is_background:
                connector = "For example,"
            else:
                connector = "That's why"

            statement = "{explanation}. "\
                        "{connector} {elaboration}.".format(
                explanation=remove_trailing_punctuation(
                    uppercase_first_letter(expl_text)
                ),
                elaboration=remove_trailing_punctuation(
                    lowercase_first_letter(elab_text)
                ),
                connector=connector
            )
        return statement
    else:
        return None

In [138]:
if "Explanation" in relations:
    for relation in relations["Explanation"]:
        print(rule_explanation_elaboration(text, relation))
        print("")

In the afternoon, she is n't so busy. That's why after school she does her homework and plays with other children.

She has many classes in the morning. That's why at about twelve fifteen, she comes back for lunch.



In [139]:
DIRECTORY = "parsed/race/train/middle"

statements = []
for file_name in os.listdir(DIRECTORY):
    path = os.path.join(DIRECTORY, file_name)
    text, relations = extract_relations(path)
    
    if "Explanation" in relations:
        for relation in relations["Explanation"]:
            statement = rule_explanation_elaboration(
                text, 
                relation
            )
            if statement is not None:
                statements.append(f"[{path}]\n{statement}")

In [140]:
with open(
    "statements/explanation_elaboration/train/middle.txt", 
    "wt"
) as f:
    f.write("\n".join(statements))