<b>SVO-Extractor</b>

Throught this notebook, we are going to find Subject, Verb, Object from text usign spaCy and NLTK library

<img src='./favicon.png' alt='image'/>

<hr>

In [None]:
! pip install nltk
! pip install spacy
#! python3 -m spacy download en_core_web_sm
#! python3 -m spacy download fr_core_news_sm

In [111]:
import spacy
from spacy.lang.en import English
from nltk.stem.wordnet import WordNetLemmatizer

In [112]:
NLP = spacy.load('en_core_web_sm')

In [113]:
# dependency markers for subjects
SUBJECTS = {"nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"}

# dependency markers for objects
OBJECTS = {"dobj", "dative", "attr", "oprd"}

<hr>

<b>FUNCTIONS</b>

In [114]:
def SVOFinder(tok_args):
    svo = []
    
    #let first retreive all verbs
    verbs = [tok for tok in tok_args if tok.pos_ == 'VERB' and tok.dep_ != 'aux']
    
    #then let find the subject of each verb
    for verb in verbs:
        subjects, verbNegated = SujectFinder(verb)
        
        #if we found subject then we try to get related object
        if(len(subjects) > 0):
            v2, objects = ObjectFinder(verb)
            
            for sub in subjects:
                for obj in objects:
                    objNegated = isNegated(obj)
                    tuple_result = (sub.lower_, '!'+verb.lemma_ if verbNegated or objNegated else verb.lemma_, obj.lower_)
                    svo.append(tuple_result)
    
    return svo

In [115]:
def SujectFinder(verb):
    verbNegated = isNegated(verb)
    subjects = [tok for tok in verb.lefts if tok.dep_ in SUBJECTS and tok.pos_ != 'DET']
    
    if len(subjects) > 0:
        subjects.extend(SubjectsFromConjunctionsFinder(subjects))
    else:
        foundSubs, verbNegated = findSubs(verb)
        subjects.extend(foundSubs)
        
    return subjects, verbNegated

In [116]:
def SubjectsFromConjunctionsFinder(subjects):
    moreSubjects = []
    for subject in subjects:
        rights = list(subject.rights)
        rightDeps = {tok.lower_ for tok in rights}
        if 'and' in rightDeps:
            moreSubjects.extend([tok for tok in rights if tok.dep_ in SUBJECTS or tok.pos_ == "NOUN"])
            if len(moreSubs) > 0:
                moreSubjects.extend(getSubsFromConjunctions(moreSubs))
                
    return moreSubjects

In [117]:
def findSubs(tok):
    head = tok.head
    while head.pos_ != 'VERB' and head.pos_ != 'NOUN' and head.head != head:
        head = head.head
    if head.pos_ == 'VERB':
        subjects = [tok for tok in head.lefts if tok.dep_ == 'SUB']
        if len(subjects) > 0:
            verbNegated = isNegated(head)
            subjects.extend(SubjectsFromConjunctionsFinder(subjects))
            return subjects, verbNegated
        elif head.head != head:
            return findSubs(head)
    elif head.pos_ == 'NOUN':
        return [head], isNegated(tok)
    return [], False

In [118]:
def isNegated(tok):
    negations = {'no', 'not', 'n\'t', 'never','none'}
    for dep in list(tok.lefts) + list(tok.rights):
        if dep.lower_ in negations:
            return True
    return False

In [119]:
def ObjectFinder(verb):
    rights = list(verb.rights)
    objects = [tok for tok in rights if tok.dep_ in OBJECTS]
    objects.extend(objectsFromPrepositionsFinder(rights))
    
    potentialNewVerb, potentialNewObject = ObjectsFromXCompFinder(rights)
    if potentialNewVerb is not None and potentialNewObject is not None and len(potentialNewObject) > 0:
        objects.extend(potentialNewObject)
        verb = potentialNewVerb
    if len(objects) > 0:
        objects.extend(objectsFromPrepositionsFinder(objects))
    return verb, objects

In [120]:
def objectsFromPrepositionsFinder(deps):
    objects = []
    for dep in deps:
        if dep.pos_ == 'ADP' and dep.dep_ == 'prep':
            objects.extend([tok for tok in dep.rights if tok.dep_ in OBJECTS or (tok.pos_ == 'PRON' and tok.lower_ == 'me')])
    return objects

In [121]:
def ObjectsFromXCompFinder(deps):
    for dep in deps:
        if dep.pos_ == 'VERB' and dep.dep_ == 'xcomp':
            v = dep
            rights = list(v.rights)
            objects = [tok for tok in rights if tok.dep_ in OBJECTS]
            objects.extend(objectsFromPrepositionsFinder(rights))
            if len(objects) > 0:
                return verb, objects
    return  None, None

<hr>

<b>MAIN</b>

In [122]:
def svo_engin(text=""):
    tok = NLP(text)
    #return text
    return SVOFinder(tok)

In [139]:
svo_engin("Adam doesn't love jack or him brother. It seems i love this girl.")

[('adam', '!love', 'jack'), ('i', 'love', 'girl')]

NB: HAVE TO OPTIMISE 