In [1]:
import spacy
from spacy.training.example import Example
from pathlib import Path
import random 
import plac


In [2]:
# training data
TRAINING_DATA = [
("few people attended the meeting", {
    'heads': [1, 2, 2, 2, 2], 
    'deps': ['quantity', 'nsubj', 'ROOT', 'det', 'osubj']
    }),
    ("many students passed the exam", {
    'heads': [1, 2, 2, 2, 2], 
    'deps': ['quantity', 'nsubj', 'ROOT', 'det', 'osubj']
    }),
    ("some items were missing from the list", {
    'heads': [1, 2, 3, 3, 3, 3, 3], 
    'deps': ['quantity', 'nsubj', 'ROOT', 'ROOT', '-', 'det', 'osubj']
    }),
    ("all the participants received a certificate", {
    'heads': [2, 2, 2, 3, 3, 3], 
    'deps': ['quantity', 'det', 'nsubj', 'ROOT', '-', 'osubj']
    }),
    ("half of the pizza is gone", {
    'heads': [3, 3, 3, 4, 4, 4], 
    'deps': ['quantity', '-', 'det', 'nsubj', 'ROOT', 'ROOT']
    }),
    ("the whole team worked together", {
    'heads': [1, 2, 3, 3, 3], 
    'deps': ['det','quantity', 'nsubj', 'ROOT', '-']
    }),
    ("numerous books filled the shelves", {
    'heads': [1, 2, 2, 2, 2], 
    'deps': ['quantity', 'nsubj', 'ROOT', 'det', 'osubj']
    }),
    ("some students completed all the assignments", {
    'heads': [1, 2, 2, 5, 5, 5], 
    'deps': ['quantity', 'nsubj', 'ROOT', 'quantity', 'det', 'osubj']
    }),
    ("There were enough chairs for everyone", {
    'heads': [0, 2, 3, 3, 5, 5], 
    'deps': ["expl", "ROOT", "quantity", "nsubj", "prep", "pobj"]}),

]



nlp_pars = spacy.load("en_core_web_sm")
# Load a blank English model
nlp_pars = spacy.blank("en")
parser = nlp_pars.add_pipe("parser")

for text, annotations in TRAINING_DATA:
    for dep in annotations.get('deps', []):
        parser.add_label(dep)

other_pipes = [pipe for pipe in nlp_pars.pipe_names if pipe != 'parser']
with nlp_pars.disable_pipes(*other_pipes): # only train parser
    optimizer = nlp_pars.begin_training()
    for itn in range(20):
        random.shuffle(TRAINING_DATA)
        losses = {}
        for text, annotations in TRAINING_DATA:
            doc = nlp_pars.make_doc(text)
            example = Example.from_dict(doc, annotations)
            nlp_pars.update([example], sgd=optimizer)
        #print(losses)

# test the trained model
test_text = "all the players scored a goal"
doc = nlp_pars(test_text)
print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != '-'])

output_dir = Path("./modelsss")
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp_pars.to_disk(output_dir)
    print("Saved model to", output_dir)
    # test the saved model
    print("Loading from", output_dir)
    nlp22 = spacy.load(output_dir)
    doc = nlp22(test_text)
    print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])





[('all', 'quantity', 'players'), ('the', 'det', 'players'), ('players', 'nsubj', 'scored'), ('scored', 'ROOT', 'scored'), ('goal', 'osubj', 'scored')]
Saved model to modelsss
Loading from modelsss
Dependencies [('all', 'quantity', 'players'), ('the', 'det', 'players'), ('players', 'nsubj', 'scored'), ('scored', 'ROOT', 'scored'), ('a', '-', 'scored'), ('goal', 'osubj', 'scored')]
