In [1]:
import json

In [2]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

In [3]:
import import_ipynb
from aux import utils
from aux import nlp
from aux import relation_extraction
from aux import defs

importing Jupyter notebook from /Users/YK/mt/project/aux/utils.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/nlp.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/relation_extraction.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/defs.ipynb


In [18]:
class RuleExplanation03(defs.Rule):
    name = "explanation_03"
    relation_type = "Explanation"
    reasons = {
        "SN_CONTAINS_IN_FACT_ETC": 
            defs.Reason(
                1, 
                "Satellite's nucleus contains " 
                "'in fact' / 'as a matter of fact' / 'actually' / 'indeed' / 'also'."),
        "SN_STARTS_WITH_AND": 
            defs.Reason(
                2, 
                "Satellite starts with 'and'."
            ),
        "NUCLEUS_AND_SN_HAVE_SAME_SUBJECT": 
            defs.Reason(
                3, 
                "Nucleus and satellite's nucleus have the same subject."
            ),
        "COMMON_PATTERN_JOINT_JOINT": 
            defs.Reason(
                4,
                "Common pattern (Joint-Joint)."
            ),
        "COMMON_PATTERN_-_ELABORATION": 
            defs.Reason(
                5,
                "Common pattern ( -Elaboration)."
            ),
        "COMMON_PATTERN_ELABORATION_JOINT": 
            defs.Reason(
                6,
                "Common pattern (Elaboration-Joint)."
            )
    }
    
    def generate_statement(self, text, relation, verbose=False):
        assert(relation is not None and relation.type == "Explanation")
        info = utils.prepare_extended_info(text, relation, verbose)
        if info is None:
            utils.print_if_verbose("Extended info preparation wasn't successful.", verbose)
            return None
        
        reason = None
        
        if utils.contains_any_of(
            utils.remove_extra_space(info.sn_text.lower()),
            [
                "in fact",
                "as a matter of fact",
                "actually",
                "indeed",
                "also"
            ]
        ):
            reason = self.reasons["SN_CONTAINS_IN_FACT_ETC"]
        elif utils.get_first_token(info.sn_text) == "and":
            reason = self.reasons["SN_STARTS_WITH_AND"]
        else:
            nucleus_subject = nlp.find_subject(
                text, info.nucleus_info.segment.start, info.nucleus_info.segment.end
            )
            if nucleus_subject is not None:
                utils.print_if_verbose(
                    f"Nucleus' subject is '{nucleus_subject}'.", verbose
                )
                sn_subject = nlp.find_subject(
                    text, info.sn_segment.start, info.sn_segment.end
                )
                if sn_subject is not None:
                    utils.print_if_verbose(
                        f"The subject of satellite's nucleus is '{sn_subject}'.",
                        verbose
                    )
                    if nucleus_subject == sn_subject:
                        reason = self.reasons["NUCLEUS_AND_SN_HAVE_SAME_SUBJECT"]
                else:
                    print_if_verbose(
                        "Failed to find a subject in the satellite's nucleus.", verbose
                    )    
            else:
                print_if_verbose(
                    "Failed to find a subject in the nucleus.", verbose
                )
            
        if reason is None: 
            # -- checking for a common pattern
            utils.print_if_verbose(
                "Nucleus relation type: "
                f"'{utils.get_relation_type(info.nucleus_info.relation)}'.",
                verbose
            )
            utils.print_if_verbose(
                "Satellite relation type: "
                f"'{utils.get_relation_type(info.satellite_info.relation)}'.",
                verbose
            )
            if (
                info.nucleus_info.relation is None 
                    and info.satellite_info.relation is not None
                    and info.satellite_info.relation.type == "Elaboration"
            ):
                reason = self.reasons["COMMON_PATTERN_-_ELABORATION"]
            elif (
                info.nucleus_info.relation is not None
                    and info.satellite_info.relation is not None
            ):
                if (
                    info.nucleus_info.relation.type == "Elaboration"
                        and info.satellite_info.relation.type == "Joint"
                ):
                    reason = self.reasons["COMMON_PATTERN_ELABORATION_JOINT"]
                elif (
                    info.nucleus_info.relation.type == "Joint"
                        and info.satellite_info.relation.type == "Joint"
                ):
                    reason = self.reasons["COMMON_PATTERN_JOINT_JOINT"]

        if reason is not None:
            utils.print_if_verbose(reason.explanation, verbose)
            
            prepared_nucleus_text = utils.remove_trailing_punctuation(
                utils.uppercase_first_letter(info.nucleus_text)
            )
            processed_sn_text = nlp.remove_leading_words(info.sn_text, verbose)
            prepared_sn_text = utils.lowercase_first_letter(
                processed_sn_text if processed_sn_text is not None 
                    else info.sn_text
            )
            statement_text = f"{prepared_nucleus_text}. Moreover {prepared_sn_text}"
            return defs.Statement(
                statement_text=statement_text,
                nucleus=prepared_nucleus_text,
                satellite_nucleus=prepared_sn_text,
                left_boundary=relation.left.start,
                right_boundary=relation.right.end,
                nucleus_proximity=info.nucleus_proximity.value,
                rule=self.name,
                reason=reason
            )
        else:
            return None

In [19]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation03()

    with open("../parsed/race/train/middle/7547.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][1]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

He likes the head teacher and decides to go .  Auggie 's first year at school has good times and bad times .  Auggie makes some good friends but other children are unfriendly to him .  At the end of the year , Auggie goes on a school trip and a frightening event there changes things completely .  
Nucleus is on the left.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite doesn't have nested relations or its depth is too small.
Satellite doesn't contain a wh-word or 'how'.
Nucleus' subject is 'His'.
The subject of satellite's nucleus is 'Auggie 's first year at school'.
Nucleus relation type: 'Joint'.
Satellite relation type: 'Joint'.
Common pattern (Joint-Joint).
Removing tokens before the first NP: 
Auggie's first year at school has good times and bad times. 
---> 
Auggie 's first year at school has good times and bad times.

RESULT:
{
  "statement_text": "He likes the head teacher and decides to go. Moreover auggie 's first year at school has good