# Dependency Parsing to Identify the Ingroup and Outgroup
---

In this notebook we are experimenting with dependency parsing to identify the ingroup and outgroup of a text.

For this tasks we use spaCy's dependency matcher to identify the ingroup and outgroup using hypernymy.

Hyponymic relationships are expressed in narrative clauses, therefore, we use the dependency matcher to detect hyponymic narrative clauses from which the ingroup and outgroup can be identified.

We refractor a set of regex defined Hearst Patterns into dependency patterns.

## Initialise the pipeline

In [1]:
%%time

import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_md")
from spacy.tokens import Token, Span, Doc
print(nlp.pipe_names)

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']
CPU times: user 2.66 s, sys: 1.22 s, total: 3.88 s
Wall time: 2.46 s


In [2]:
%%time

import pandas as pd

group_schema = {
    "Social" : {"Neutral": {"Person": ["man"],
                            "Community": ["organization", "nomad"]},
                "Ingroup": {"Family": ["brothers"],
                            "Ally": ["friend"]},
                "Outgroup": {"Outcast": ""},
                "Entity": {"Area": "",
                          "Structure": [""],
                          "Capability": ["thing"],
                          "Event": [""]},
                "Elevation": {"Positive": ["truthful", "exceptional", "good"]},
                "Othering": {"Negative": ["unfair", "hoax"]}},
    "Religious": {"Neutral": {"ReligiousGroup" : ["ulema"],
                              "ReligiousPerson": [""]},
                  "Ingroup": {"Believer": [""]},
                  "Outgroup": {"Aposate": [""]},
                  "Entity": {"Area": [""],
                             "Structure": [""],
                             "Capability": [""],
                             "Event": [""]},
                  "Elevation": {},
                  "Othering": {}},
    "Commercial": {"Neutral": {"Econ": ["passenger", "customer"]},
                   "Ingroup": {},
                   "Outgroup": {"Competitor": [""]},
                   "Entity": {"Area": "",
                              "Structure": [""],
                              "Capability": [""],
                              "Event": [""]},
                   "Elevation": {},
                   "Othering": {}},
    "Health" : {"Neutral": {"MedicalPerson": [""]},
                "Ingroup": {},
                "Outgroup" : {"Vermin": ["parasite"]},
                "Entity": {"Area": "",
                           "Structure": [""],
                           "Medicine": ["vaccine"],
                           "Event": [""]},
                "Elevation": {},
                "Othering": {}},
    "Security": {"Neutral": {"SecurityGroup": [""]},
                 "Ingroup": {},
                 "Outgroup": {"Criminal": ["terrorist"]},
                 "Entity": {"Area": "",
                            "Structure": [""],
                            "Capability": [""],
                            "Event": [""]},
                 "Elevation": {},
                 "Othering": {}},
    "Political": {"Neutral": {"Institution": ["authorities", "alliance"]},
                  "Ingroup": {},
                  "Outgroup": {},
                  "Entity": {"Area": "",
                             "Structure": [""],
                             "Capability": [""],
                             "Event": [""]},
                  "Elevation": {},
                  "Othering": {}},
    "Military": {"Neutral": {},
                 "Ingroup": {},
                 "Outgroup": {"Adversary": ["enemy"]},
                 "Entity": {"Area": "",
                            "Structure": [""],
                            "Capability": [""],
                            "Event": [""]},
                 "Elevation": {},
                 "Othering": {}}
}

labels = []
typology = dict()
typology_chart = dict()

## create a list of keys
schema = {ideology: {subcat: ', '.join(list(terms.keys())) for (subcat, terms) in value.items()} 
             for (ideology, value) in group_schema.items()}

keys = [list(cat.keys()) for cat in list(schema.values())][0]

## Create frames for table
frames = []
schema = {ideology: {subcat: list(terms.keys()) for (subcat, terms) in value.items()} 
             for (ideology, value) in group_schema.items()}


for frame in [list(cat.keys()) for cat in list(schema.values())][0]:
    frames.append(pd.DataFrame.from_dict({k : v[frame] for k, v in list(schema.items())}, orient = 'index').fillna("").T)

# display table
display(pd.concat(frames, keys = keys))

Unnamed: 0,Unnamed: 1,Social,Religious,Commercial,Health,Security,Political,Military
Neutral,0,Person,ReligiousGroup,Econ,MedicalPerson,SecurityGroup,Institution,
Neutral,1,Community,ReligiousPerson,,,,,
Ingroup,0,Family,Believer,,,,,
Ingroup,1,Ally,,,,,,
Outgroup,0,Outcast,Aposate,Competitor,Vermin,Criminal,,Adversary
Entity,0,Area,Area,Area,Area,Area,Area,Area
Entity,1,Structure,Structure,Structure,Structure,Structure,Structure,Structure
Entity,2,Capability,Capability,Capability,Medicine,Capability,Capability,Capability
Entity,3,Event,Event,Event,Event,Event,Event,Event
Elevation,0,Positive,,,,,,


CPU times: user 190 ms, sys: 51.5 ms, total: 241 ms
Wall time: 576 ms


## Create the utility functions

In [3]:
%%time

def doc_dep_graph(doc, strip = False):
    
    ''' 
    Put the graph with entity labels present (see 'tag' and 'label')
    '''
    
    words = [] # the words of a dependency graph
    arcs = [] # the arcs of a dependency graph
    
    for tok in doc:
        
        labels = dict()
        labels["text"] = tok.text
        if tok._.concept:
            labels["tag"] = tok._.concept
        else:
            labels["tag"] = tok.ent_type_
        words.append(labels)
        
#     words = [{"text" : tok.text, "tag": tok.pos_} for tok in doc]
#     print("hasModifier: ", [(tok, tok._.hasModifier) for tok in doc])
#     print("isModifying: ", [(tok, tok._.isModifying) for tok in doc])
    
    # iterate through each token in a doc
    for tok in doc:
        
        # ignore punctuation
        if tok.dep_ in ['punct']:
            continue
    
        if strip and not len(tok._.hasModifier):
            continue

        # if the token head has a method
        # the label becomes the method label
        
        if len(tok._.hasModifier) > 0:
            for mod in tok._.hasModifier:
#                 print(tok, '=>', mod[0], '=>', mod[0][1], '=>', tok.i)
                if mod[0] != "isHyponym":
                    label = mod[0]
                    head = mod[1].i

                    if tok.i < head:
                        arcs.append({
                            "start": tok.i,
                            "end": head,
                            "label": label,
                            "dir": "right"
                        })

                    # if the token index is greater than its head, the token head is the head
                    elif tok.i > head:
                        arcs.append({
                            "start": head,
                            "end": tok.i,
                            "label": label,
                            "dir": "left"
                        })
        else:
            if len(tok._.isModifying):
                label = ""
            else:
                label = tok.dep_
            if tok.i < tok.head.i:
                arcs.append({
                    "start": tok.i,
                    "end": tok.head.i,
                    "label": label,
                    "dir": "left"
                })

            # if the token index is greater than its head, the token head is the head
            elif tok.i > tok.head.i:
                arcs.append({
                    "start": tok.head.i,
                    "end": tok.i,
                    "label": label,
                    "dir": "right"
                })
    return {"words": words, "arcs": arcs}

CPU times: user 1e+03 ns, sys: 1e+03 ns, total: 2 µs
Wall time: 2.86 µs


## Setup The Named Concept Recognition

In [4]:
%%time
import spacy
from spacy.language import Language

# import pandas as pd

def custom_ents(doc):
    
    """
    returns entities with their modifier as an entity phrase
    """
    ents = []
    
    for ent in doc.ents:
        if ent.root.dep_ in ["amod", "compound"]:
            new_ent = Span(doc, ent.start, ent.root.head.i + 1, label = ent.root.ent_type_)
            ents.append(new_ent)
        else:
            ents.append(ent)
    return ents

print("1: ", nlp.pipe_names)

@Language.factory("NCR", assigns = [], default_config = {})
def create_Named_Concept_Recognition(nlp, name):
    return Named_Concept_Recognition(nlp, name)

class Named_Concept_Recognition:
    
    """
    Pipeline Component for labelling concepts by schema types
    and as an named entity for nouns with a named entity modifier
    """
    
    def __init__(self, nlp, name):
        
        # snippet of the group schema
        self.group_labels = {
            "Social" : {
                "neutral": {"Person": ["man"],
                            "Community": ["organization", "nomad"],
                           "entity": ["thing"]},
                "ingroup": {"Family": ["brothers"],
                            "Ally": ["friend"]},
                "elevation": {"Positive": ["truthful", "exceptional", "good"]},
                "othering": {"Negative": ["unfair", "hoax"]}},
            "Religious": {"neutral": {"ReligiousGroup" : ["ulema"]}},
            "Commercial": {"neutral": {"Customer": ["passenger"]}},
            "Health" : {"neutral": {"entity": ["vaccine"]},
                        "outgroup" : {"Vermin": ["parasite"]}},
            "Security": {"outgroup": {"Criminal": ["terrorist"]}},
            "Political": {"neutral": {"Group": ["authorities", "alliance"]}},
            "Military": {"outgroup": {"Adversary": ["enemy"]}}
        }

        # set the custom labels
        Token.set_extension("concept", default = "", force = True)
        Token.set_extension("attribute", default = "", force = True)
        Token.set_extension("context", default = "", force = True)
        Token.set_extension("modifyingTerm", default = "", force = True)
        Token.set_extension("hasModifier", default = [], force = True)
        Token.set_extension("isModifying", default = [], force = True)
        Token.set_extension("get_noun_span", getter = self.get_noun_span, force = True)
    
    def __call__(self, doc):
        
        for token in doc:
            token._.context, token._.attribute, token._.concept = self.get_concept(token)
            token._.modifyingTerm = token
            
        return doc
    
    def get_concept(self, token):
        
        """
        token extension for getting labels from group schema
        """
        for context, attributes in self.group_labels.items():
            for attribute, concepts in attributes.items():
                for concept, terms in concepts.items():
                    if token.lemma_.lower() in terms:
                        return context, attribute, concept
        return None, None, None
    
    def get_noun_span(self, token): # Named Concept Recognition
        
        nominal_deps = ["appos", "acl", "relcl", "det", "predet", "nummod", "amod", "poss", "nmod"]
        adverbial_deps = ["advmod", "advcl", "neg", "npmod"]
        compound_deps = ["compound", "prt", "case", "mark"]
        
        for chunk in token.subtree:
            if chunk.i == token.i:
                return token
            if chunk.dep_ in nominal_deps + adverbial_deps + compound_deps:
                return doc[chunk.i : token.i+1]
                
def explain(doc):

    """
    doc extension for retrieving grouping category
    """
    modification = {"elevation": "elevated",
                   "othering": "othered",
                   "outgroup": "othered",
                   "ingroup": "elevated",
                   "neutral": "neutral"}

    for ent in doc.noun_chunks:
        
        # get the ent span to include compound statements
        ent_span = ent.root
        for left in ent_span.lefts:
            if left.dep_ == ["compound", "amod"] or left.ent_type_ or left._.concept:
                ent_span = doc[left.i : ent_span.i + 1]
                break
        
        # get whether the ent has a modifying term
        modifier = ent.root._.modifyingTerm
        
        if ent.root._.concept:
            print(f"'{ent_span}' has a '{modifier._.attribute}' classification where '{modifier}' is an '{modifier._.concept}' phrase from the '{modifier._.context}' context.")
        
        if ent.root._.hasModifier:
        
            for method in ent.root._.hasModifier:
                
                # get the modifier noun span
                noun_span = ''
                noun_span = method[1]._.get_noun_span
                modifier = method[1]._.modifyingTerm

                # if the dependency label is negated
                if method[0] in ["hasNegation"]:
                    print(f"'{ent_span}' is disassociated from '{noun_span}', which is a '{modifier._.attribute}' term from the '{modifier._.context}' context.")
                    continue
                    
                if method[0] in ["hasModifier"] and ent.root.ent_type_:
                    print(f"'{ent_span}' is {modification[method[1]._.attribute]} by the '{spacy.explain(method[1].dep_)} ({method[1].dep_})' term '{method[1]}' from the '{method[1]._.context}' context.'")
                    
                elif method[0] not in ["isHyponym"] and not ent.root._.concept:
                    modifier = method[1]._.modifyingTerm
                    print(f"'{ent_span}' is referred to as '{noun_span}', which is a '{modifier._.attribute}' term from the '{modifier._.context}' context.")
                
                # warning for particular using of terms
                if modifier._.concept == "Vermin":
                    print(f"WARNING: the term '{modifier}' referring to '{ent_span}' is from the '{modifier._.concept}' category and is often used in genocidal language.")
                    
print("2: ", nlp.pipe_names)
component = "NCR"
if component in nlp.pipe_names:
    nlp.remove_pipe(component)

nlp.add_pipe("NCR")
Doc.set_extension("explain", method = explain, force = True)
Doc.set_extension("custom_ents", getter = custom_ents, force = True)
print("3", nlp.pipe_names)

1:  ['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']
2:  ['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']
3 ['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner', 'NCR']
CPU times: user 968 µs, sys: 48 µs, total: 1.02 ms
Wall time: 1.01 ms


In [5]:
print(nlp.factory_names)

['attribute_ruler', 'tok2vec', 'merge_noun_chunks', 'merge_entities', 'merge_subtokens', 'token_splitter', 'doc_cleaner', 'parser', 'beam_parser', 'lemmatizer', 'trainable_lemmatizer', 'entity_linker', 'ner', 'beam_ner', 'entity_ruler', 'tagger', 'morphologizer', 'senter', 'sentencizer', 'textcat', 'spancat', 'future_entity_ruler', 'span_ruler', 'textcat_multilabel', 'en.lemmatizer']


## Set Up The Clause Labelling Dependency Parser

In [6]:
%%time
from spacy.matcher import DependencyMatcher

class Clause_Labelling:
    
    def __init__(self, vocab, patterns, pattern_names=None):
         
    # Add patterns with pattern_names to the dependency matcher 
        
        if pattern_names is None:
            pattern_names = ["pattern" + str(pi) for pi in range(len(patterns))]
        else:
            pattern_names = [x for x in pattern_names]
        self.matcher = DependencyMatcher(vocab)
        for pi, pattern in enumerate(patterns):
            print("pattern names: ", pattern_names[pi], pattern)
            self.matcher.add(pattern_names[pi], pattern)
        
    def __call__(self, doc): # Clause Labelling
    # Match the patterns to a doc, returns dep graph with edges that match 
    
        matches = []
        # iterate through the matches
        for match in self.matcher(doc):

            # capture the match label
            match_id = nlp.vocab.strings[match[0]]
#             print(f"{match_id}: {[doc[i] for i in match[1][0]]}")
            
            methods = {
                "hasMod": "hasModifier",
                "hasModifier": "isModifierOf",
                
                "isHyponym": "hasHypernym",
                "hasHypernym": "isHyponym",
                "isHyp" : "isHyponym",
                "verbprepPredicate" : "isHyponym",
                "verbPredicate" : "isHyponym",
                
                "hasNameOf": "isCalled",
                "hasNamely": "isCalled",
                "isCalled": "isKnownAs"
            }

            for subtree in match[1]:
                negated = ""
                for i, idx in zip(range(len(subtree)), subtree):
                    pattern = patterns[match_id][i]["SPEC"]["NODE_NAME"]
                    if pattern == "SUBJECT":
                        clause_subject = doc[idx]
                    if pattern == "OBJECT":
                        clause_object = doc[idx]
                    if pattern == "NEGATED":
                        negated = doc[idx]

                if clause_object._.concept:
                    clause_subject._.hasModifier.append((methods[match_id], clause_object))
                    clause_object._.isModifying.append((methods[clause_subject._.hasModifier[-1][0]], clause_subject))
                    
                if match_id in ["hasMod"] and clause_object._.concept:
                    clause_subject._.modifyingTerm = clause_object
                    
                elif match_id not in ["hasMod"]:
                    clause_subject._.isModifying.append((methods[match_id], clause_object))
                    clause_object._.hasModifier.append((methods[clause_subject._.isModifying[-1][0]], clause_subject))
#                     clause_object._.modifyingTerm = clause_subject
                
                if negated:
                    negated._.isModifying.append(("isNegated", clause_object))
                    clause_object._.hasModifier.append(("hasNegation", negated))
#                     clause_object._.modifyingTerm = clause_subject
        return doc

# if not Language.has_factory("Clause_Labelling"):
#     @Language.factory("Clause_Labelling",
#                       assigns=[],
#                       default_config={'patterns': None, 'pattern_names': None})
#     def makeClauseLabelling(nlp, name, patterns, pattern_names):
#         """
#         Utility function for creating spaCy pipeline component.
#         """

#         return Clause_Labelling(nlp.vocab, patterns=patterns)
    
# if not Language.has_factory("Clause_Labelling"):

@Language.factory("Clause_Labelling", default_config={'patterns': None, 'pattern_names': None})
def makeClauseLabelling(nlp, name, patterns, pattern_names):
    """
    Utility function for creating spaCy pipeline component.
    """

    return Clause_Labelling(nlp.vocab, patterns=patterns, pattern_names=pattern_names)



CPU times: user 164 µs, sys: 0 ns, total: 164 µs
Wall time: 167 µs


In [7]:
Language.has_factory("Clause_Labelling")

True

## Developing Patterns for the Parser

Developing general patterns for clauses using the labelling schema reveals a primary syntactic model upon whichlayers of meaning are applied according to the predicate relationship. In reference to UD framework categories, one of six subject dependency labels generally refer to a clause’s subject, and one of four object labels refer to the object,while one of two prepositional or one of three complement labels can refer to either. For Labov and Waletzky, predicatesare a clause’s head, which indicates its syntactic function and sequence in a narrative. For detecting a predicate’s syntactic function, the UD framework is also used for verb labelling, while VerbNet provides a lexical resource for interpreting its the semantic meaning. As will be shown, how the subject and object are linked by the predicate gives different meaning to each clause.

In [8]:
%%time

naming_predicate_list = {"LEMMA": {"IN": ["know", "name", "namely", "baptize", "call", "christen",
                         "dub", "entitle", "nickname", "rename"]}}
verb_tag_list = {"TAG": {"IN": ["VBN", "VBD", "VBP", "VBG", "VBZ"]}}
_object = {"POS": {"IN": ["NOUN", "PROPN"]}}
_subject = {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}
subject_deps = {"DEP": {"IN": ["nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"]}}
object_deps = {"DEP": {"IN": ["dobj", "dative", "attr", "oprd"]}}
complements_deps = {"DEP": {"IN": ["ccomp", "xcomp", "acomp"]}}
nominal_deps = {"DEP": {"IN": ["appos", "acl", "relcl", "det", "predet", "nummod", "amod", "poss", "nmod"]}}
adverbial_deps = {"DEP": {"IN": ["advmod", "advcl", "neg", "npmod"]}}
preposition_deps = {"DEP": {"IN": ["pobj", "pcomp"]}}
coordination_deps = {"DEP": {"IN": ["conj", "cc", "preconj", "prep"]}}
auxiliary_deps = {"DEP": {"IN": ["aux", "auxpass"]}}
compound_deps = {"DEP": {"IN": ["compound", "prt", "case", "mark"]}}
ROOT = {"DEP": {"IN": ["ROOT"]}}
_object = {**_object, **object_deps}
_subject = {**_subject, **subject_deps}

def join_objs(d1, lst):
    DEPS = []
    for entry in lst:
        DEPS += entry["DEP"]["IN"]

    return dict([("POS", d1["POS"]), ("DEP", {"IN": d1["DEP"]["IN"] + DEPS})])

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 6.2 µs


## Method 1 - Detects Nouns or Proper Nouns which are modified by a Named Concept

The modification method detects modifier relationships in the dependency parse for both objectives one and two of the methodology. 

To explain this method, we take four wrongly classified named entities from the first Sentiment Analysis experiment
- Muslim friends (Bush (-0.83)), 
- the United States Authorities (Bush (-0.64))
- Mujahidin Brothers (bin Laden (-0.58))
- US Enemy (bin Laden (+0.42)).

The code below shows how using the group labelling schema, each concept is labelled according to its group concept using the custom pipeline component.

In [9]:
texts = [
    "our many Muslim friends. the United States Authorities.",
    "your Mujahidin Brothers. the US Enemy.",
    "the unfair United Nations. the truthful Ulema."
        ]

patterns = {}

patterns.update({"hasMod": [
    
    {"PATTERN": {"POS": {"IN": ["NOUN", "PROPN"]}},
     "SPEC": {"NODE_NAME": "SUBJECT"}},
    
    {"PATTERN": {"DEP": {"IN": ["amod", "compound"]}},
     "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "OBJECT"}}
    
]})
        
options = {"fine_grained" : True,
          "add_lemma" : True,
          "distance" : 110,
          "word_spacing" : 40,
          "collapse_punct": True}

# nlp.add_pipe('ClauseLabelling')
nlp.add_pipe(Clause_Labelling(nlp.vocab, patterns.values(), patterns.keys()))
# nlp.add_pipe("Clause_Labelling", config={"patterns": patterns})

for text in texts:
    doc = nlp(text)
#     print([(tok, tok._.isModifying) for tok in doc])
    displacy.render(doc_dep_graph(doc, strip = False), style = "dep", manual = True, options = options)
    doc._.explain()

pattern names:  hasMod [{'PATTERN': {'POS': {'IN': ['NOUN', 'PROPN']}}, 'SPEC': {'NODE_NAME': 'SUBJECT'}}, {'PATTERN': {'DEP': {'IN': ['amod', 'compound']}}, 'SPEC': {'NBOR_NAME': 'SUBJECT', 'NBOR_RELOP': '>', 'NODE_NAME': 'OBJECT'}}]


ValueError: [E1008] Invalid pattern: each pattern should be a list of dicts. Check that you are providing a list of patterns as `List[List[dict]]`.

As can be seen from above}, these labels enable the generation of an rationale to explain group classification from a compound or adjectival modifier (amod) relationship. 

As also shown above, this method can also be used to explain how an named entity is either elevated or othered by a modifier term.

## Method 2: Naming Method

While the modification method draws upon a simple modifier relationship, the naming method is the first of the three to draw upon language clauses. 

The pattern for this method is based on the clause head linking a a subject and object by a naming term. 

The predicate terms from the data set for this method are 'known' and 'named', and using VerbNet, 20 similarily functional terms are added to the pattern.

In [None]:
patterns = {}

patterns.update({"hasMod": [
    
    {"PATTERN": {"POS": {"IN": ["NOUN", "PROPN"]}},
     "SPEC": {"NODE_NAME": "SUBJECT"}},
    
    {"PATTERN": {"DEP": {"IN": ["amod", "compound"]}},
     "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "OBJECT"}}
    
]})

patterns.update({"hasNameOf": [  # object naming object

    # OBJECT known as / named SUBJECT

    {"PATTERN": join_objs(_subject, [preposition_deps]),
     "SPEC": {"NODE_NAME": "SUBJECT"}},

    {"PATTERN": naming_predicate_list,
        "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "PREDICATE"}},

    {"PATTERN": join_objs(_object, [_subject, preposition_deps]),
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">>", "NODE_NAME": "OBJECT"}}
]})

patterns.update({"hasNamely": [  # object naming object

    # VERB the OBJECT, namely the SUBJECT

    {"PATTERN": _object,
     "SPEC": {"NODE_NAME": "SUBJECT"}},
    
    {"PATTERN": verb_tag_list,
     "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": "<", "NODE_NAME": "VERB_PREDICATE"}},
    
    {"PATTERN": join_objs(_object, [ROOT]),
     "SPEC": {"NBOR_NAME": "VERB_PREDICATE", "NBOR_RELOP": "<", "NODE_NAME": "OBJECT"}},
    
    {"PATTERN": naming_predicate_list,
        "SPEC": {"NBOR_NAME": "OBJECT", "NBOR_RELOP": ">", "NODE_NAME": "PREDICATE"}},
]})

texts = [
    "passengers like an exceptional man named Todd Beamer",
    "a collection of loosely affiliated terrorist organizations known as al Qaeda.",
#     "leaving the main enemy in the region, namely the Jewish-American alliance"
]

components = ["Clause_Labelling"]
for component in components:
    if component in nlp.pipe_names:
        nlp.remove_pipe(component)
        
nlp.add_pipe(Clause_Labelling(nlp, patterns.values(), patterns.keys()))

for text in texts:
    
    doc = nlp(text)
    options = {
#         "fine_grained" : True,
        "add_lemma" : True,
        "distance" : 900/len(doc),
        "word_spacing" : 40,
        "collapse_punct": True}

    displacy.render(doc_dep_graph(doc, strip = True), style = "dep", manual = True, options = options)
    doc._.explain()
#     displacy.render(doc, options = options)

In [None]:
group_labels = {
    "Social" : {"neutral": {"Person": ["man"],
                            "Community": ["organization", "nomad"],
                            "Entity": ["thing"]},
                "ingroup": {"Family": ["brothers"],
                            "Ally": ["friend"]},
                "elevation": {"Right": ["truthful"],
                              "Good": ["exceptional", "good"]},
                "othering": {"Wrong": ["unfair", "hoax"]}},
    "Religious": {"neutral": {"ReligiousGroup" : ["ulema"]}},
    "Commercial": {"neutral": {"Customer": ["passenger"]}},
    "Health" : {"neutral": {"Entity": ["vaccine"]}, 
                "outgroup" : {"Vermin": ["parasite"]}},
    "Security": {"outgroup": {"Criminal": ["terrorist"]}},
    "Political": {"neutral": {"Group": ["authorities", "alliance"]}},
    "Military": {"outgroup": {"Adversary": ["enemy"]}}
}

def get_concept(token):

    """
    token extension for getting labels from group schema
    """
    for context, attributes in group_labels.items():
        for attribute, concepts in attributes.items():
            for concept, terms in concepts.items():
                if token.lower() in terms:
                    return context, attribute, concept
    return "fuckface"

print(get_concept("terrorist"))

In [None]:
patterns = {}

patterns.update({"hasMod": [
    
    {"PATTERN": {"POS": {"IN": ["NOUN", "PROPN"]}},
     "SPEC": {"NODE_NAME": "SUBJECT"}},
    
    {"PATTERN": {"DEP": {"IN": ["amod", "compound"]}},
     "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "OBJECT"}}
    
]})

patterns.update({"hasNameOf": [  # object naming object

    # OBJECT known as / named SUBJECT

    {"PATTERN": join_objs(_subject, [preposition_deps]),
     "SPEC": {"NODE_NAME": "SUBJECT"}},

    {"PATTERN": naming_predicate_list,
        "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "PREDICATE"}},

    {"PATTERN": join_objs(_object, [_subject, preposition_deps]),
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">>", "NODE_NAME": "OBJECT"}}
]})

patterns.update({"hasNamely": [  # object naming object

    # VERB the OBJECT, namely the SUBJECT

    {"PATTERN": _object,
     "SPEC": {"NODE_NAME": "SUBJECT"}},
    
    {"PATTERN": verb_tag_list,
     "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": "<", "NODE_NAME": "VERB_PREDICATE"}},
    
    {"PATTERN": join_objs(_object, [ROOT]),
     "SPEC": {"NBOR_NAME": "VERB_PREDICATE", "NBOR_RELOP": "<", "NODE_NAME": "OBJECT"}},
    
    {"PATTERN": naming_predicate_list,
        "SPEC": {"NBOR_NAME": "OBJECT", "NBOR_RELOP": ">", "NODE_NAME": "PREDICATE"}},
]})

hyponymy_list = {"LEMMA": {"IN": ["like", "include", "except", "whether", "as"]}}
patterns.update({"isHyp": [  # Hypernym prep Hyponym
    
    {"PATTERN": join_objs(_subject, [ROOT]),
     "SPEC": {"NODE_NAME": "SUBJECT"}},

    {"PATTERN": hyponymy_list,
        "SPEC": {"NBOR_NAME": "SUBJECT", "NBOR_RELOP": ">", "NODE_NAME": "PREDICATE"}},

    {"PATTERN": join_objs(_object, [preposition_deps]),
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">>", "NODE_NAME": "OBJECT"}}
]})

patterns.update({"verbPredicate": [  # Hypernym verb Hyponym

    # who attacked our country
    {"PATTERN": _subject,
     "SPEC": {"NODE_NAME": "OBJECT"}},

    {"PATTERN": verb_tag_list,
     "SPEC": {"NBOR_NAME": "OBJECT", "NBOR_RELOP": "<", "NODE_NAME": "PREDICATE"}},

    {"PATTERN": {"DEP": {"IN": ["neg"]}},
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">", "NODE_NAME": "NEGATION"}},
    
    {"PATTERN": {"DEP": {"IN": ["cc"]}},
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">", "NODE_NAME": "CORDINATING_CONJUNCTION"}},

    {"PATTERN": join_objs(_object, [complements_deps]),
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">", "NODE_NAME": "NEGATED"}},
    
    {"PATTERN": {"DEP": {"IN": ["conj"]}},
     "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">", "NODE_NAME": "SUBJECT"}}
]})

patterns.update({"verbprepPredicate": [  # Hypernym verb (prep) Hyponym

    # war begins with al Qaeda

    {"PATTERN": _subject,
     "SPEC": {"NODE_NAME": "OBJECT"}},

    {"PATTERN": verb_tag_list,
        "SPEC": {"NBOR_NAME": "OBJECT", "NBOR_RELOP": "<", "NODE_NAME": "PREDICATE"}},

    {"PATTERN": {
        "DEP": {"IN": ["prep"]}}, # TODO: this needs to be fixed to be an adjacent token
        "SPEC": {"NBOR_NAME": "PREDICATE", "NBOR_RELOP": ">", "NODE_NAME": "PREP"}},

    {"PATTERN": join_objs(_object, [complements_deps, preposition_deps]),
     "SPEC": {"NBOR_NAME": "PREP", "NBOR_RELOP": ">", "NODE_NAME": "SUBJECT"}}
]})


components = ["Clause_Labelling"]
for component in components:
    if component in nlp.pipe_names:
        nlp.remove_pipe(component)
        
nlp.add_pipe(Clause_Labelling(nlp, patterns.values(), patterns.keys()))
    
texts = [
    "passengers like an exceptional man named Todd Beamer",
    "The Aryan himself was probably at first a Nomad",
    "The Jew has never been a Nomad, but always a parasite",
    "Lockdowns are killing countries all over the world",
    "vaccines have never been a good thing, but always a hoax"
]

for text in texts:
    
    doc = nlp(text)
    options = {
#         "fine_grained" : True,
        "add_lemma" : True,
        "distance" : 1000/len(doc),
        "word_spacing" : 40,
        "collapse_punct": True}

    displacy.render(doc_dep_graph(doc, strip = True), style = "dep", manual = True, options = options)
    doc._.explain()
#     displacy.render(doc, options = options)
