In [None]:
import spacy
from spacy.training.example import Example
from spacy.pipeline.dep_parser import DependencyParser
import random
import tqdm

TRAIN_DATA = [
    ("There is enough evidence to support the claim.", {
        "heads": [1, 1, 3, 1, 5, 3, 7, 5, 1],
        "deps": ["expl", "ROOT", "Quantity", "attr", "aux", "acl", "det", "dobj", "punct"]
    }),
    ("Numerous studies have shown similar results.", {
        "heads": [1, 3, 3, 3, 5, 3, 3],
        "deps": ["Quantity", "nsubj", "aux", "ROOT", "amod", "dobj", "punct"]
    }),
    ("The team needs some additional resources.", {
        "heads": [1, 2, 2, 5, 5, 2, 2],
        "deps": ["det", "nsubj", "ROOT", "Quantity", "amod", "dobj", "punct"]
    }),
    ("All the tickets were sold out.", {
        "heads": [2, 2, 4, 4, 4, 4, 4],
        "deps": ["Quantity", "det", "nsubjpass", "auxpass", "ROOT", "prt", "punct"]
    }),
    ("There were numerous opportunities to participate.", {
        "heads": [1, 1, 3, 1, 4, 4, 1],
        "deps": ["expl", "ROOT", "Quantity", "det", "pobj", "infmod", "punct"]
    }),
    ("There were few people at the party.", {
        "heads": [1, 1, 3, 1, 3, 6, 4, 1],
        "deps": ["expl", "ROOT", "Quantity", "nsubj", "prep", "det", "pobj", "punct"]
    }),
    ("Many students attended the lecture.", {
        "heads": [1, 2, 2, 4, 2, 2],
        "deps": ["Quantity", "nsubj", "ROOT", "det", "dobj", "punct"]
    }),
    ("The team needs enough time to prepare.", {
        "heads": [1, 2, 2, 4, 2, 6, 4, 2],
        "deps": ["det", "nsubj", "ROOT", "Quantity", "dobj", "aux", "relcl", "punct"]
    }),
    ("Some cats prefer to sleep all day.", {
        "heads": [1, 2, 2, 4, 2, 6, 4, 2],
        "deps": ["Quantity", "nsubj", "ROOT", "aux", "xcomp", "det", "npadvmod", "punct"]
    }),
    ("All participants received a certificate.", {
        "heads": [1, 2, 2, 4, 2, 2],
        "deps": ["Quantity", "nsubj", "ROOT", "det", "dobj", "punct"]
    }),
    ("Half of the pizza is mine.", {
        "heads": [4, 0, 3, 1, 4, 4, 4],
        "deps": ["Quantity", "prep", "det", "pobj", "ROOT", "attr", "punct"]
    }),
    ("The whole process took hours.", {
        "heads": [2, 2, 3, 3, 3, 3],
        "deps": ["det", "Quantity", "nsubj", "ROOT", "dobj", "punct"]
    }),
    ("Many volunteers helped organize the event.", {
        "heads": [1, 2, 2, 4, 4, 2, 6],
        "deps": ["Quantity", "nsubj", "ROOT", "xcomp", "det", "dobj", "punct"]
    }),
    ("Some animals prefer to roam freely in the wild.", {
        "heads": [1, 2, 2, 4, 4, 4, 4, 1, 8, 8],
        "deps": ["Quantity", "nsubj", "ROOT", "aux", "xcomp", "advmod", "prep", "det", "pobj", "punct"]
    }),
    ("Many books were left on the shelf.", {
        "heads": [1, 1, 3, 1, 3, 3, 3, 6],
        "deps": ["Quantity", "nsubj", "ROOT", "det", "dobj", "prep", "pobj", "punct"]
    }),
    ("The students found numerous books in the library.", {
        "heads": [1, 2, 2, 4, 4, 4, 4, 4, 5],
        "deps": ["det", "nsubj", "ROOT", "Quantity", "dobj", "prep", "det", "pobj", "punct"]
    }),
    ("The teacher gave all the students an interesting task.", {
        "heads": [1, 2, 2, 4, 4, 5, 5, 7, 7, 7],
        "deps": ["det", "nsubj", "ROOT", "Quantity", "det", "dobj", "det", "amod", "punct", "punct"]
    }),
    ("She bought some colorful flowers for the event.", {
        "heads": [1, 2, 2, 4, 4, 4, 6, 6, 6],
        "deps": ["nsubj", "ROOT", "Quantity", "amod", "dobj", "prep", "det", "pobj", "punct"]
    }),
    ("The whole team celebrated their victory.", {
        "heads": [1, 2, 2, 4, 4, 4, 6],
        "deps": ["det", "nsubj", "Quantity", "det", "dobj", "amod", "punct"]
    }),
    ("She ate the whole pizza by herself.", {
        "heads": [1, 2, 2, 4, 4, 6, 6, 4],
        "deps": ["nsubj", "ROOT", "det", "Quantity", "dobj", "prep", "pobj", "punct"]
    })
]

nlp = spacy.blank("en")
nlp.add_pipe("parser")
parser = nlp.get_pipe("parser")
for _, annotations in TRAIN_DATA:
    for dep in annotations.get('deps', []):
        parser.add_label(dep)

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.begin_training()
    for itn in tqdm.tqdm(range(50)):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            try:
                doc = nlp.update([example], drop=0.5, losses=losses)
            except ValueError as e:
                print(f"Error for sentence: {text}")
                print(f"Heads: {annotations['heads']}")
                print(f'Deps: {annotations["deps"]}')
                print(f"Predicted transitions: {[token.head.i for token in doc]}")
                print(f"Error message: {e}")



100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:30<00:00,  1.63it/s]


In [7]:
test_text = "Half of the cookies were eaten. many people attended the event all the numerous received a certificate. Few people attended the meeting. Some of nba players got the golden.She ate the whole pizza by herself because wasn’t enough left to share."
doc = nlp(test_text)

for token in doc:
    print(f"{token.text} -> {token.dep_} ({token.head.text})")

Half -> Quantity (were)
of -> prep (Half)
the -> det (cookies)
cookies -> pobj (of)
were -> ROOT (were)
eaten -> attr (were)
. -> punct (were)
many -> Quantity (people)
people -> nsubj (attended)
attended -> ROOT (event)
the -> dobj (attended)
event -> det (.)
all -> Quantity (the)
the -> ROOT (the)
numerous -> Quantity (received)
received -> ROOT (received)
a -> det (certificate)
certificate -> dobj (received)
. -> aux (people)
Few -> Quantity (people)
people -> nsubj (attended)
attended -> ROOT (the)
the -> det (meeting)
meeting -> dobj (attended)
. -> ROOT (the)
Some -> Quantity (of)
of -> ROOT (the)
nba -> det (players)
players -> dobj (of)
got -> prep (players)
the -> det (golden)
golden -> pobj (of)
. -> aux (She)
She -> nsubj (ate)
ate -> ROOT (the)
the -> advmod (received)
whole -> Quantity (pizza)
pizza -> ROOT (pizza)
by -> prep (herself)
herself -> ROOT (herself)
because -> prep (herself)
was -> pobj (because)
n’t -> prep (because)
enough -> Quantity (left)
left -> attr (n’t