In [1]:
import json

In [2]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

In [3]:
import import_ipynb
from aux import utils
from aux import nlp
from aux import relation_extraction
from aux import defs
import preparation

importing Jupyter notebook from /Users/YK/mt/project/aux/utils.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/nlp.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/relation_extraction.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/defs.ipynb
importing Jupyter notebook from preparation.ipynb


In [4]:
def is_nn(relation):
    return (
        relation is not None
            and relation.left.type == "N"
            and relation.right.type == "N"
    )

In [5]:
class RuleExplanation07(defs.Rule):
    name = "explanation_07"
    relation_type = "Explanation"
    reasons = {
        "COMMON_PATTERN_-_S=NN": 
            defs.Reason(
                1,
                "Common pattern ( -Satellite=NN)."
            ),
        "COMMON_PATTERN_S=NN_-": 
            defs.Reason(
                1,
                "Common pattern (Satellite=NN- )."
            )
    }
    
    def generate_statement(self, text, relation, verbose=False):
        assert(relation is not None and relation.type == "Explanation")
        info = preparation.Preprocessor.prepare_extended_info(text, relation, verbose)
        if info is None:
            utils.print_if_verbose("Extended info preparation wasn't successful.", verbose)
            return None
        
        utils.print_if_verbose(
                "Nucleus relation type: "
                f"'{utils.get_relation_type(info.nucleus_info.relation)}'.",
                verbose
            )
        utils.print_if_verbose(
            "Satellite relation type: "
            f"'{utils.get_relation_type(info.satellite_info.relation)}'.",
            verbose
        )
            
        reason = None
        
        if (
            info.nucleus_info.relation is None
                and is_nn(info.satellite_info.relation)
        ):
            reason = self.reasons["COMMON_PATTERN_-_S=NN"]
        elif (
            info.satellite_info.relation is None
                and is_nn(info.nucleus_info.relation)
        ):
            reason = self.reasons["COMMON_PATTERN_S=NN_-"]
        
        if reason is not None:
            utils.print_if_verbose(reason.explanation, verbose)
            
            prepared_nucleus_text = utils.remove_trailing_punctuation(
                utils.uppercase_first_letter(info.nucleus_preparation_result.prepared_text)
            )
            processed_s_text = nlp.remove_leading_words(
                info.satellite_preparation_result.prepared_text, verbose
            )
#             processed_s_text = nlp.remove_leading_words(
#                 info.satellite_preparation_result.prepared_text, verbose
#             )
#             prepared_s_text = utils.lowercase_first_letter(
#                 processed_s_text if processed_s_text is not None 
#                     else info.satellite_preparation_result.prepared_text
#             )
            prepared_s_text = utils.lowercase_first_letter(
                info.satellite_preparation_result.prepared_text
            )
            statement_text = f"{prepared_nucleus_text} and {prepared_s_text}"
            return defs.Statement(
                statement_text=statement_text,
                nucleus=prepared_nucleus_text,
                satellite_nucleus=prepared_s_text,
                left_boundary=relation.left.start,
                right_boundary=relation.right.end,
                nucleus_proximity=info.nucleus_proximity.value,
                rule=self.name,
                reason=reason
            )
        else:
            return None

In [6]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation07()

    with open("../parsed/race/train/middle/2827.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][1]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

The Boones traveled across Pennsylvania .  At last the family came to the green Yadkin Valley in North Carolina .  There were a few houses there already , but the farmland was much broader than that in Pennsylvania .  Father Boone said , " This is good farmland .  We will stop here . "  
Nucleus is on the left.
Nucleus's depth <= 3.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
The depth of the satellite's nucleus <= 3.
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
At last the family came to the green Yadkin Valley in North Carolina.  There were a few houses there already, but the farmland was much broader than that in Pennsylvania.  Father Boone said, " This is good farmland.  We will stop here. "
Nucleus relation type: '-'.
Satellite relation type: 'Joint'.
Common pattern ( -Satellite=NN).
Removing tokens before the first NP:
-- synt

In [7]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation07()

    with open("../parsed/race/train/middle/4568.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][1]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

After a short while , we can take our money from the slot at the bottom of the machine .  At last , if we do n't need other services , we can press the key for ending .  Then our cards will come out from the slot we put it in .  And we can finish our dealing and take the cards and money away .  
Nucleus is on the left.
Nucleus's depth <= 3.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
The depth of the satellite's nucleus <= 3.
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
At last, if we do n't need other services, we can press the key for ending.  Then our cards will come out from the slot we put it in.  And we can finish our dealing and take the cards and money away.
Nucleus relation type: '-'.
Satellite relation type: 'Joint'.
Common pattern ( -Satellite=NN).
Removing tokens before the first NP:
-- syntactic parsing result
 (ROOT
  