In [None]:
!python -m spacy download en_core_web_md


In [None]:
import spacy
import random
from spacy.tokens import Doc
from spacy.training import Example
from spacy.pipeline import DependencyParser
from typing import List, Tuple

PARSER_CONFIG = 'parser.cfg'
TRAINING_DATA = [
    # ('ALL DAY I LIKE ACTIVE NOT SIT WATCH TV', {
    #     'heads':  [1, 7, 3, 3, 3, 6, 3, 6, 7],
    #     'deps': ['det',
    # 'npadvmod',
    # 'nsubj',
    # 'ROOT',
    # 'acomp',
    # 'neg',
    # 'conj',
    # 'conj',
    # 'dobj']
    # }),
    ('ALL day I LIKE near home', {
        'heads': [0, 3, 3, 0, 5, 3],
        'deps': ['ROOT', 'nsubj', 'neg', 'conj', 'conj', 'dobj']
    })
]


def create_training_examples(training_data: List[Tuple]) -> List[Example]:
    """ Create list of training examples """
    examples = []
    nlp = spacy.load('en_core_web_md')
    for text, annotations in training_data:
        print(f"{text} - {annotations}")
        examples.append(Example.from_dict(nlp(text), annotations))
    return examples


def save_parser_config(parser: DependencyParser):
    print(f"Save parser config to '{PARSER_CONFIG}' ... ", end='')
    parser.to_disk(PARSER_CONFIG)
    print("DONE")


def load_parser_config(parser: DependencyParser):
    print(f"Load parser config from '{PARSER_CONFIG}' ... ", end='')
    parser.from_disk(PARSER_CONFIG)
    print("DONE")


def main():
    nlp = spacy.blank('en')
    # Create new parser
    parser = nlp.add_pipe('parser', first=True)
    for text, annotations in TRAINING_DATA:
        for label in annotations['deps']:
            if label not in parser.labels:
                parser.add_label(label)
    print(f"Added labels: {parser.labels}")

    examples = create_training_examples(TRAINING_DATA)

    # Training
    # NOTE: The 'lambda: examples' part is mandatory in Spacy 3 - https://spacy.io/usage/v3#migrating-training-python
    optimizer = nlp.initialize(lambda: examples)
    print(f"Training ... ", end='')
    for i in range(25):
        print(f"{i} ", end='')
        random.shuffle(examples)
        nlp.update(examples, sgd=optimizer)
    print(f"... DONE")

    save_parser_config(parser)

    # I can load parser config to blank model ...
    nlp = spacy.blank('en')
    parser = nlp.add_pipe('parser')

    # ... but I cannot load parser config to already existing model
    # Return -> ValueError: could not broadcast input array from shape (106,64) into shape (27,64)
    # nlp = spacy.load('en_core_web_md')
    # parser = nlp.get_pipe('parser')

    load_parser_config(parser)

    print(f"Current pipeline is {nlp.meta['pipeline']}")

    doc = nlp(u'find a high paid job with no degree')
    print(f"Arcs: {[(w.text, w.dep_, w.head.text) for w in doc if w.dep_ != '-']}")

In [None]:
main()