In [4]:
import json

In [5]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

In [6]:
import import_ipynb
from aux import utils
from aux import nlp
from aux import relation_extraction
from aux import defs
import preparation

In [9]:
class RuleExplanation05(defs.Rule):
    name = "explanation_05"
    relation_type = "Explanation"
    reasons = {
        "COMMON_PATTERN_-_EXPLANATION->CONDITION": 
            defs.Reason(
                1,
                "Common pattern ( -Explanation->Condition)."
            ),
        "COMMON_PATTERN_CONDITION-EXPLANATION": 
            defs.Reason(
                2,
                "Common pattern (Condition-Explanation)."
            ),
        "COMMON_PATTERN_WHATEVER-CONTRAST": 
            defs.Reason(
                3,
                "Common pattern ( -Contrast)."
            )
    }
    
    def generate_statement(self, text, relation, verbose=False):
        assert(relation is not None and relation.type == "Explanation")
        info = preparation.Preprocessor.prepare_extended_info(text, relation, verbose)
        if info is None:
            utils.print_if_verbose("Extended info preparation wasn't successful.", verbose)
            return None
        
        utils.print_if_verbose(
                "Nucleus relation type: "
                f"'{utils.get_relation_type(info.nucleus_info.relation)}'.",
                verbose
            )
        utils.print_if_verbose(
            "Satellite relation type: "
            f"'{utils.get_relation_type(info.satellite_info.relation)}'.",
            verbose
        )
            
        reason = None
        
        assert info.satellite_info.relation.type is not None
        if info.nucleus_info.relation is None:
            if (
                info.satellite_info.relation.type == "Explanation"
                    and info.sn_relation is not None
                    and info.sn_relation.type == "Condition"
            ):
                reason = self.reasons["COMMON_PATTERN_-_EXPLANATION->CONDITION"]
        else:
            if (
                info.nucleus_info.relation.type == "Condition"
                    and info.satellite_info.relation.type == "Explanation"
            ):
                reason = self.reasons["COMMON_PATTERN_CONDITION_EXPLANATION"]
            elif info.satellite_info.relation.type == "Contrast":
                reason = self.reasons["COMMON_PATTERN_-_CONTRAST"]

        if reason is not None:
            utils.print_if_verbose(reason.explanation, verbose)
            
            prepared_nucleus_text = utils.remove_trailing_punctuation(
                utils.uppercase_first_letter(info.nucleus_preparation_result.prepared_text)
            )
            processed_sn_text = nlp.remove_leading_words(
                info.satellite_preparation_result.prepared_text, verbose
            )
            prepared_sn_text = utils.lowercase_first_letter(
                processed_sn_text if processed_sn_text is not None 
                    else info.sn_text
            )
            statement_text = f"{prepared_nucleus_text} but {prepared_sn_text}"
            return defs.Statement(
                statement_text=statement_text,
                nucleus=prepared_nucleus_text,
                satellite_nucleus=prepared_sn_text,
                left_boundary=relation.left.start,
                right_boundary=relation.right.end,
                nucleus_proximity=info.nucleus_proximity.value,
                rule=self.name,
                reason=reason
            )
        else:
            return None

In [10]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation05()

    with open("../parsed/race/train/middle/1310.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][0]
    print(text[expl.left.start:expl.right.end])

    statement =rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

Typhoons are very dangerous .  In 2004 , Typhoon Yunna killed 164 people in Zhejiang , and 24 people were missing .  
Nucleus is on the left.
Nucleus's depth <= 3.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
The depth of the satellite's nucleus <= 3.
Parsing result:
(ROOT
  (S
    (PP (IN In)
      (NP (CD 2004)))
    (, ,)
    (S
      (NP (NNP Typhoon) (NNP Yunna))
      (VP (VBD killed)
        (NP (CD 164) (NNS people))
        (PP (IN in)
          (NP (NNP Zhejiang)))))
    (, ,)
    (CC and)
    (S
      (NP (CD 24) (NNS people))
      (VP (VBD were)
        (ADJP (VBG missing))))
    (. .)))

Constituencies:
    type  start  end  depth
0     IN      0    1      3
1     CD      1    2      4
2     NP      1    2      3
3     PP      0    2      2
4      ,      2    3      2
5    NNP      3    4      4
6    NNP      4    5      4
7     NP      3    5      3
8    VBD      5    6      4
9     CD      6  