In [1]:
import json

In [2]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

In [3]:
import import_ipynb
from aux import utils
from aux import nlp
from aux import relation_extraction
from aux import defs
import preparation
import rule_base

importing Jupyter notebook from /Users/YK/mt/project/aux/utils.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/nlp.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/relation_extraction.ipynb
importing Jupyter notebook from /Users/YK/mt/project/aux/defs.ipynb
importing Jupyter notebook from preparation.ipynb
importing Jupyter notebook from rule_base.ipynb


In [4]:
def is_attribution(relation):
    return relation is not None and relation.type == "Attribution"


def has_nested_attribution(relation):
    return (
        (
            relation.left == "N" and is_attribution(relation.left_child)
        )
        or (
            relation.right == "N" and is_attribution(relation.right_child)
        )
    )

In [5]:
class SpeechVerbFinder:
    def __init__(self):
        with open("aux/speech_verbs.txt", "rt") as f:
            self.speech_verbs = {line.strip().lower() for line in f.readlines()}
    
    def find_speech_verb(self, tokens):
        found_verbs = self.speech_verbs.intersection({nlp.normalise_verb(t) for t in tokens})
        if len(found_verbs) == 0:
            return None
        else:
            return list(found_verbs)[0]

In [6]:
class RuleExplanation02(rule_base.Rule):
    name = "explanation_02"
    relation_type = "Explanation"
    reasons = {
        "SPEECH": 
            defs.Reason(
                1, 
                "Nucleus is surrounded by quotation marks" 
                " and satellite's satellite contains a speech verb."),
        "SATELLITE_STARTS_WITH_SO": 
            defs.Reason(
                2, 
                "Satellite starts with 'so'."
            ),
        "COMMON_PATTERN_SIMPLE_NUCLEUS": 
            defs.Reason(
                3, 
                "Common pattern; nucleus without nested relations."
            ),
        "COMMON_PATTERN_COMPLEX_NUCLEUS": 
            defs.Reason(
                4,
                "Common pattern; nucleus has nested relations."
            )
    }
    
    def __init__(self):
        self.speech_verb_finder = SpeechVerbFinder()
        
    def generate_statement(self, text, relation, verbose=False, **kwargs):
        assert(relation is not None and relation.type == "Explanation")
        info = preparation.Preprocessor.prepare_extended_info(text, relation, verbose)
        if info is None:
            utils.print_if_verbose("Extended info preparation wasn't successful.", verbose)
            return None
        
        if info.satellite_info.relation is None:
            return None
        
        _, ss_segment = info.satellite_info.relation.get_satellite()
        if ss_segment is not None:
            if (
                info.nucleus_preparation_result.prepared_text[:1] == '"' 
                    or info.nucleus_preparation_result.prepared_text[:2] == "``"
            ):
                ss_tokens = utils.lowercase_and_tokenize(
                    text[ss_segment.start:ss_segment.end]
                )
                satellite_speech_verb = self.speech_verb_finder.find_speech_verb(ss_tokens)
                if satellite_speech_verb is not None:
                    utils.print_if_verbose(
                        f"Satellite's speech verb: {satellite_speech_verb}", verbose
                    )
                    new_st, sent_no = nlp.move_st(text, ss_segment.start, 5)
                    resolved_subjects = nlp.get_resolved_subjects(
                        text[new_st:ss_segment.end]
                    )
                    if len(resolved_subjects) > 0:
                        resolved_subject = resolved_subjects[-1]
                        utils.print_if_verbose(
                            f"The resolved subject: {resolved_subject}", verbose
                        )
                        if resolved_subject is not None:
                            reason = self.reasons["SPEECH"]
                            utils.print_if_verbose(reason.explanation, verbose)
                            verb = nlp.conjugate(satellite_speech_verb)
                            final_nucleus_text, final_sn_text = \
                                self._finalise_statement_parts(
                                    info.nucleus_preparation_result.prepared_text,
                                    info.satellite_preparation_result.prepared_text,
                                    verbose
                                )
                            return self._generate_statement(
                                final_nucleus_text, 
                                f". That is why {resolved_subject} {verb} ",
                                (
                                    utils.fix_quotes(final_sn_text) 
                                        if final_sn_text is not None else None
                                ), 
                                relation, 
                                info.nucleus_proximity.value,
                                self.name,
                                reason,
                                verbose
                            )          
                        else:
                            utils.print_if_verbose(
                                "Failed to find the subject of the satellite's satellite.", 
                                verbose
                            ) 
                    else:
                        utils.print_if_verbose(
                            "Failed to find the subject of the satellite's satellite.", 
                            verbose
                        )
                else:
                     utils.print_if_verbose(
                         "Didn't find any speech verbs in the satellite's satellite.", 
                         verbose
                     )  
            else:
                utils.print_if_verbose("Nucleus doesn't start with quotes.", verbose)
        else:
            utils.print_if_verbose("Satellite doesn't have a satellite.", verbose)

        reason = None

        if utils.get_first_token(
                text[info.satellite_info.segment.start:info.satellite_info.segment.end]
            ) == "so":
            reason = self.reasons["SATELLITE_STARTS_WITH_SO"]
            utils.print_if_verbose("Satellite starts with 'so'.", verbose)
            condition_holds = True
        else:
            utils.print_if_verbose(
                "Nucleus relation type: "
                f"'{utils.get_relation_type(info.nucleus_info.relation)}'.",
                verbose
            )
            utils.print_if_verbose(
                "Satellite relation type: "
                f"'{utils.get_relation_type(info.satellite_info.relation)}'.",
                verbose
            )
            if (
                info.nucleus_info.relation is None 
                and info.satellite_info.relation is not None
                and (
                        info.satellite_info.relation.type in {
                            "Joint", "Elaboration", "Attribution"
                        }
                        or (
                            info.satellite_info.relation.type == "Explanation" 
                            and has_nested_attribution(info.satellite_info.relation)
                        )
                )
            ):
                reason = self.reasons["COMMON_PATTERN_SIMPLE_NUCLEUS"]
            else:
                if (
                    info.nucleus_info.relation is not None
                    and info.satellite_info.relation is not None
                    and (
                        (
                            info.nucleus_info.relation.type == "Elaboration"
                            and info.satellite_info.relation.type == "Attribution"
                        )
                        or (
                            info.nucleus_info.relation.type == "Background"
                            and info.satellite_info.relation.type == "Explanation"
                        )
                        or (
                            info.nucleus_info.relation.type == "Joint"
                            and info.satellite_info.relation.type in {
                                "Elaboration", "Attribution", "Explanation"
                            }
                        )
                    )
                ):
                    reason = self.reasons["COMMON_PATTERN_COMPLEX_NUCLEUS"]
                else:
                    utils.print_if_verbose(
                        "None of conditions was met.", verbose
                    )

        if reason is not None:
            utils.print_if_verbose(reason.explanation, verbose)      
            final_nucleus_text, final_sn_text = self._finalise_statement_parts(
                info.nucleus_preparation_result.prepared_text,
                info.satellite_preparation_result.prepared_text,
                verbose
            )
            return self._generate_statement(
                final_nucleus_text, 
                ". That is why ", 
                final_sn_text, 
                relation, 
                info.nucleus_proximity.value,
                self.name,
                reason,
                verbose
            )
        else:
            return None

In [7]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation02()

    with open("../parsed/race/train/middle/3972.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][0]
    print(text[expl.left.start:expl.right.end])

    statement = rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

( 4 ) Walk with confidence .  It does n't matter how cool your uniform is ( or is not ) .  If you are not confident , nobody will notice how great you look !  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus is flat.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
It does n't matter how cool your uniform is ( or is not ).  If you are not confident, nobody will notice how great you look!
Nucleus doesn't start with quotes.
Nucleus relation type: '-'.
Satellite relation type: 'Elaboration'.
Common pattern; nucleus without nested relations.
Taking the last sentence and resolving pronouns:
( 4 ) Walk with confidence. 
---> 
(4) Walk with confidence.
Taking the first sentence and removing leading words:
-- syntactic parsing result
 (ROOT
  (S
    (NP (PRP It))
    (VP (VBZ does) (RB n't)
      (VP (VB matte

In [8]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation02()

    with open("../parsed/race/train/middle/6760.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][2]
    print(text[expl.left.start:expl.right.end])

    statement = rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

" I 'm so worried about our farm , " she wrote .  " It ' stime to plant potatoes .  I ca n't dig all the fields by myself .  Jim read the letter and became sad .  " What can I do ? " he thought .  Then he had a good idea .  He wrote to his mother , " Do n't dig the fields , there 's a pot of money in the earth .  Do n't plant potatoes until I come home .  " Some days later , Jim got another letter from his mother .  It said , " Two days ago , about ten men came to our farm and dug all our fields .  
Nucleus is on the left.
Nucleus's depth <= 100.
Parsing result:
(ROOT
  (S ('' '')
    (S
      (NP (PRP I))
      (VP (VBP 'm)
        (ADJP (RB so) (JJ worried)
          (PP (IN about)
            (NP (PRP$ our) (NN farm))))))
    (, ,) ('' '')
    (NP (PRP she))
    (VP (VBD wrote))
    (. .) ('' '')))

Constituencies:
    type  start  end  depth
0     ''      0    1      2
1    PRP      1    2      4
2     NP      1    2      3
3    VBP      2    3      4
4     RB      3    4      5
5 

In [9]:
if __name__ == "__main__" and "__file__" not in globals():
    rule = RuleExplanation02()

    with open("../parsed/race/train/middle/7237.txt.tree", "rt") as f:
        tree_text = f.read()

    text, relations = relation_extraction.read_relations(
        tree_text.replace("<s>", "").replace("<P>", "")
    )

    expl = relations["Explanation"][0]
    print(text[expl.left.start:expl.right.end])

    statement = rule.generate_statement(text, expl, verbose=True)
    print("\nRESULT:")
    if statement is not None:
        print(json.dumps(statement._asdict(), indent=2))

" And he waited quietly for his turn , instead of pushing the others aside ; showing that he was modest ( ) .  " When I talked with him , I noticed that his clothes were carefully brushed , his hair in nice order , and his teeth as white as milk .  When he wrote his name , I noticed that his fingernails were clean ; instead of having some untidy personal habits .  " Do n't you call these things letters of recommendation ?  I do ; and the things that I can discover a boy by using my eyes for ten minutes , is worth more than all the fine letters that he can bring . "  
Nucleus is on the left.
Nucleus's depth <= 100.
Nucleus's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Satellite's nucleus is on the left.
Nuclei proximity is NucleusProximity.NEAR
Satellite's (left) nucleus contains '.', '!', '?', or ';'.
Will use the whole segment.
Text extracted from the satellite:
" When I talked with him, I noticed that his clothes were carefully brushed, his hair in nice