In [1]:
import json

In [2]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

In [3]:
import import_ipynb
from aux import utils
from aux import nlp
from aux import relation_extraction
from aux import defs
import preparation
import rule_base

importing Jupyter notebook from /Users/YK/mt/project/aux/utils.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/nlp.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/relation_extraction.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/defs.ipynb
importing Jupyter notebook from preparation.ipynb
importing Jupyter notebook from rule_base.ipynb


In [4]:
class RuleExplanation04(rule_base.Rule):
    name = "explanation_04"
    relation_type = "Explanation"
    reasons = {
#         "COMMON_PATTERN_-_ELABORATION->ELABORATION": # INCLUDED IN 9
#             defs.Reason(
#                 1,
#                 "Common pattern ( -Elaboration->Elaboration)."
#             ),
        "COMMON_PATTERN_-_EXPLANATION": 
            defs.Reason(
                2,
                "Common pattern ( -Explanation)."
            ),
        "COMMON_PATTERN_-_JOINT": 
            defs.Reason(
                3,
                "Common pattern ( -Joint)."
            ),
        "COMMON_PATTERN_ELABORATION_JOINT": 
            defs.Reason(
                4,
                "Common pattern (Elaboration-JOINT)."
            ),
        "COMMON_PATTERN_ATTRIBUTION_JOINT": 
            defs.Reason(
                5,
                "Common pattern (Attribution-Joint)."
            ),
        "COMMON_PATTERN_ELABORATION_ELABORATION": 
            defs.Reason(
                6,
                "Common pattern (Elaboration-Elaboration)."
            ),
        "COMMON_PATTERN_ATTRIBUTION_EXPLANATION": 
            defs.Reason(
                7,
                "Common pattern (Attribution-Explanation)."
            ),
        "COMMON_PATTERN_-_S=NN": 
            defs.Reason(
                8,
                "Common pattern ( -Satellite=NN)."
            ),
        "COMMON_PATTERN_-_ELABORATION": 
            defs.Reason(
                9,
                "Common pattern ( -Elaboration)."
            ),
        "COMMON_PATTERN_WHATEVER_CONTRAST": 
            defs.Reason(
                10,
                "Common pattern (Whatever-Contrast)."
            ),
        "COMMON_PATTERN_-_ELABORATION": 
            defs.Reason(
                11,
                "Common pattern ( -Elaboration)."
            )
#         "COMMON_PATTERN_-_ELABORATION->>BACKGROUND": # INCLUDED IN 9
#             defs.Reason(
#                 12,
#                 "Common pattern ( -Elaboration->>Background)."
#             ),
#         "COMMON_PATTERN_-_JOINT->>BACKGROUND": # INCLUDED IN 3
#             defs.Reason(
#                 13,
#                 "Common pattern ( -Joint->>Background)."
#             )
    }
    
    def choose_connective(prepared_nucleus_text, prepared_sn_text):
        if (
            len(word_tokenize(prepared_nucleus_text)) 
                + len(word_tokenize(prepared_sn_text)) <= 14
        ):
            return ", and "
        else:
            return ". In fact, "
                
    def generate_statement(self, text, relation, verbose=False, **kwargs):
        assert(relation is not None and relation.type == "Explanation")
        info = preparation.Preprocessor.prepare_extended_info(text, relation, verbose)
        if info is None:
            utils.print_if_verbose("Extended info preparation wasn't successful.", verbose)
            return None
        
        if info.satellite_info.relation is None:
            return None
        
        utils.print_if_verbose(
                "Nucleus relation type: "
                f"'{utils.get_relation_type(info.nucleus_info.relation)}'.",
                verbose
            )
        utils.print_if_verbose(
            "Satellite relation type: "
            f"'{utils.get_relation_type(info.satellite_info.relation)}'.",
            verbose
        )
            
        reason = None
        
        assert info.satellite_info.relation is not None
        
        if info.nucleus_info.relation is None:
            if info.satellite_info.relation.type == "Elaboration":
                reason = self.reasons["COMMON_PATTERN_-_ELABORATION"]
            elif info.satellite_info.relation.type == "Explanation":
                reason = self.reasons["COMMON_PATTERN_-_EXPLANATION"]
            elif info.satellite_info.relation.type == "Joint":
                reason = self.reasons["COMMON_PATTERN_-_JOINT"]
            elif utils.is_nn(info.satellite_info.relation):
                reason = self.reasons["COMMON_PATTERN_-_S=NN"]
        else:
            if info.satellite_info.relation.type == "Joint":
                if info.nucleus_info.relation.type == "Elaboration":
                    reason = self.reasons["COMMON_PATTERN_ELABORATION_JOINT"]
                elif info.nucleus_info.relation.type == "Attribution":
                    reason = self.reasons["COMMON_PATTERN_ATTRIBUTION_JOINT"]
            else:
                if (
                    info.nucleus_info.relation.type == "Elaboration"
                        and info.satellite_info.relation.type == "Elaboration"
                ):
                    reason = self.reasons["COMMON_PATTERN_ELABORATION_ELABORATION"]
                elif (
                    info.nucleus_info.relation.type == "Attribution"
                        and info.satellite_info.relation.type == "Explanation"
                ): 
                    reason = self.reasons["COMMON_PATTERN_ATTRIBUTION_EXPLANATION"]
                elif info.satellite_info.relation.type == "Contrast":
                    reason = self.reasons["COMMON_PATTERN_WHATEVER_CONTRAST"]
                
                
        if reason is not None:
            utils.print_if_verbose(reason.explanation, verbose)
            
            final_nucleus_text, final_sn_text = self._finalise_statement_parts(
                info.nucleus_preparation_result.prepared_text,
                info.satellite_preparation_result.prepared_text,
                verbose
            )
            connective = RuleExplanation04.choose_connective(
                final_nucleus_text, final_sn_text
            )
            return self._generate_statement(
                final_nucleus_text, 
                connective, 
                final_sn_text, 
                relation, 
                info.nucleus_proximity,
                self.name,
                reason,
                verbose
            )
        else:
            return None

In [5]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation04()

    with open("../parsed/race/train/middle/1310.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][0]
    print(text[expl.left.start:expl.right.end])

    statement = rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

Typhoons are very dangerous .  In 2004 , Typhoon Yunna killed 164 people in Zhejiang , and 24 people were missing .  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Parsing result:
(ROOT
  (S
    (PP (IN In)
      (NP (CD 2004)))
    (, ,)
    (S
      (NP (NNP Typhoon) (NNP Yunna))
      (VP (VBD killed)
        (NP (CD 164) (NNS people))
        (PP (IN in)
          (NP (NNP Zhejiang)))))
    (, ,)
    (CC and)
    (S
      (NP (CD 24) (NNS people))
      (VP (VBD were)
        (ADJP (VBG missing))))
    (. .)))

Constituencies:
    type  start  end  depth
0     IN      0    1      3
1     CD      1    2      4
2     NP      1    2      3
3     PP      0    2      2
4      ,      2    3      2
5    NNP      3    4      4
6    NNP      4    5      4
7     NP      3    5      3
8    VBD      5    6      4
9     CD      6    7      5
10   NNS      7    8      5
11

In [6]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation04()

    with open("../parsed/race/train/middle/2827.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][1]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

The Boones traveled across Pennsylvania .  At last the family came to the green Yadkin Valley in North Carolina .  There were a few houses there already , but the farmland was much broader than that in Pennsylvania .  Father Boone said , " This is good farmland .  We will stop here . "  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
At last the family came to the green Yadkin Valley in North Carolina.  There were a few houses there already, but the farmland was much broader than that in Pennsylvania.  Father Boone said, " This is good farmland.  We will stop here. "
Nucleus relation type: '-'.
Satellite relation type: 'Joint'.
Common pattern ( -Joint).
Taking the last sentence and resolving pronouns:
The Boones traveled across Pennsylvania. 
-

In [7]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation04()

    with open("../parsed/race/train/middle/2708.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][1]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

You can give your parents a happy day with a card or a joke .  It 's also lovely when a kid cleans up his or her room without being asked .  And if you try not fight with your brothers or sisters , your parents will be so happy .  Do your best at whatever you do .  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
It's also lovely when a kid cleans up his or her room without being asked.  And if you try not fight with your brothers or sisters, your parents will be so happy.  Do your best at whatever you do.
Nucleus relation type: '-'.
Satellite relation type: 'Joint'.
Common pattern ( -Joint).
Taking the last sentence and resolving pronouns:
You can give your parents a happy day with a card or a joke. 
---> 
You can give your parents a happy day 

In [8]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation04()

    with open("../parsed/race/train/middle/8170.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][0]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

The first man decided to leave alone .  As the ship was about to leave , the first man heard a voice , " Why are you leaving your friend on island ? "  " My gs are mine alone , since I was the one that prayed for them , " the first man answered .  " His prayers were all unanswered . "  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
As the ship was about to leave, the first man heard a voice, " Why are you leaving your friend on island? "  " My gs are mine alone, since I was the one that prayed for them, " the first man answered.  " His prayers were all unanswered. "
Nucleus relation type: '-'.
Satellite relation type: 'Elaboration'.
Common pattern ( -Elaboration).
Taking the last sentence and resolving pronouns:
The first man decided to leave 