In [1]:
import spacy
from spacy import displacy
from diaparser.parsers import Parser

In [2]:
target_sent = 'A customer can place an order and check the order status.'

# SpaCy

## Base

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp(target_sent)
displacy.render(doc, style='dep', options={'compact': True, 'distance': 120})

## RoBERTa

In [18]:
nlp = spacy.load('en_core_web_trf')

In [19]:
doc = nlp(target_sent)
displacy.render(doc, style='dep', options={'compact': True, 'distance': 120})

# DiaParser

In [14]:
parser = Parser.load('en_ewt-electra')

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [15]:
dataset = parser.predict(target_sent, text='en')
sent = dataset.sentences[0]

displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

# VerbNet

In [30]:
import nltk

In [29]:
from nltk.corpus import verbnet
from xml.etree import ElementTree
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag

In [None]:
nltk.download('verbnet')
nlkt.download('wordnet')

In [24]:
lemmatizer = WordNetLemmatizer()
lemmatized_verb = lemmatizer.lemmatize("place")
stem = verbnet.classids(lemma=lemmatized_verb)
print(stem)

verbnet.vnclass(stem[0])

print(verbnet.pprint(stem[0]))

['put-9.1-2']
put-9.1-2
  Subclasses: (none)
  Members: place set put
  Thematic roles:
    * Theme[+abstract]
  Frames:
    PP-NP (upon/on-PP)
      Example: They put upon me a brilliant, red helm.
      Syntax: NP[Agent] VERB PREP[on upon] NP[Destination] NP[Theme]
      Semantics:
        * motion(during(E), Theme)
        * Prep(start(E), Theme, Destination)
        * Prep(end(E), Theme, Destination)
        * cause(Agent, E)


In [31]:
lemmatizer = WordNetLemmatizer()
lemmatized_verb = lemmatizer.lemmatize("drop")
stem = verbnet.classids(lemma=lemmatized_verb)
print(stem)

verbnet.vnclass(stem[1])

print(verbnet.pprint(stem[0]))

['calibratable_cos-45.6-1', 'meander-47.7', 'put_direction-9.4', 'roll-51.3.1']
calibratable_cos-45.6-1
  Subclasses: (none)
  Members: appreciate balloon climb decline decrease depreciate differ
    diminish drop fall fluctuate gain grow increase jump mushroom
    plummet plunge rocket rise skyrocket soar surge tumble vary dip
    lower swell
  Thematic roles:

  Frames:
    Intransitive (Attribute Subject)
      Example: Oil's price soared.
      Syntax: NP[Patient] LEX['s] NP[Attribute] VERB
      Semantics:
        * change_value(during(E), Direction, Attribute, Patient)


# Bucketization pipeline

In [8]:
import nltk.data

In [14]:
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
nlp = spacy.load('en_core_web_sm')

In [23]:
text = '''
An amateur football team typically has two scheduled training sessions each week, and each training consists of a selection of exercises.
The coach usually decides which exercises will be practiced, but there is a rising demand among coaches to involve their players with the setup of the training sessions.
This need will be addressed by the Vote4Fun module.
This also adds value to a training, because the players will be more motivated to do an exercise they chose themselves.
Here is where Vote4Fun comes in.
Vote4Fun will give the coach of a football team a medium to create a poll.
The coach will select a set of exercises, after which all players can vote on the exercise they like the most.
After the deadline the Vote4Fun poll will be closed, and the exercise with the most amount of votes can then be used in the training session.'''

sentences = tokenizer.tokenize(text)

In [26]:
buckets = {
    'process': [],
    'structural': [],
    'use case': []
}

for sentence in sentences:
    doc = nlp(sentence)
    deps = [token.dep_ for token in doc]
    
    # Process text rules
    if 'advmod' in deps:
        buckets['process'].append(sentence)
        
    # Use case text rules
    if 'nsubj' in deps and 'dobj' in deps:
        buckets['use case'].append(sentence)
    
    # Structural text rules
    if 'nsubjpass' in deps and 'pobj' in deps and 'prep' in deps:
        buckets['structural'].append(sentence)

print(buckets)

{'process': ['\nAn amateur football team typically has two scheduled training sessions each week, and each training consists of a selection of exercises.', 'The coach usually decides which exercises will be practiced, but there is a rising demand among coaches to involve their players with the setup of the training sessions.', 'This also adds value to a training, because the players will be more motivated to do an exercise they chose themselves.', 'Here is where Vote4Fun comes in.', 'After the deadline the Vote4Fun poll will be closed, and the exercise with the most amount of votes can then be used in the training session.'], 'structural': ['The coach usually decides which exercises will be practiced, but there is a rising demand among coaches to involve their players with the setup of the training sessions.', 'After the deadline the Vote4Fun poll will be closed, and the exercise with the most amount of votes can then be used in the training session.'], 'use case': ['\nAn amateur footb

In [28]:
target_sent = 'An amateur football team typically has two scheduled training sessions each week, and each training consists of a selection of exercises.'

nlp = spacy.load('en_core_web_sm')
doc = nlp(target_sent)
displacy.render(doc, style='dep')

In [37]:
lemmatizer = WordNetLemmatizer()
lemmatized_verb = lemmatizer.lemmatize('having', pos='v')
print(lemmatized_verb)
stem = verbnet.classids(lemma='having')
print(stem)



# print(verbnet.pprint(stem[0]))

have
[]


In [None]:
verbnet.themroles