# Chapter 4 - Training a neural network model

## Training a neural network model

In [7]:
import spacy
from spacy.lang.en import English
from spacy.tokens import Token
from spacy.tokens import Doc, Span
from spacy.matcher import Matcher

### Creating training data (1)


In [None]:
# Two tokens whose lowercase forms match 'iphone' and 'x'
pattern1 = [{'LOWER': 'iphone'}, {'LOWER': 'x'}]

# Token whose lowercase form matches 'iphone' and an optional digit
pattern2 = [{'LOWER': 'iphone'}, {'IS_DIGIT': True, 'OP': '?'}]

# Add patterns to the matcher
matcher.add('GADGET', None, pattern1, pattern2)

In [3]:
TEXTS = ['McDonalds is my favorite restaurant.',
 'Here I thought @McDonalds only had precooked burgers but it seems they only have not cooked ones?? I have no time to get sick..',
 'People really still eat McDonalds :(',
 'The McDonalds in Spain has chicken wings. My heart is so happy ',
 '@McDonalds Please bring back the most delicious fast food sandwich of all times!!....The Arch Deluxe :P',
 'please hurry and open. I WANT A #McRib SANDWICH SO BAD! :D',
 'This morning i made a terrible decision by gettin mcdonalds and now my stomach is payin for it']

In [5]:
nlp = spacy.load("en_core_web_md")

In [10]:
matcher = Matcher(nlp.vocab)

# Create a Doc object for each text in TEXTS
for doc in nlp.pipe(TEXTS):
    # Find the matches in the doc
    matches = matcher(doc)
    
    # Get a list of (start, end, label) tuples of matches in the text
    entities = [(start, end, 'GADGET') for match_id, start, end in matches]
    print(doc.text, entities)   

McDonalds is my favorite restaurant. []
Here I thought @McDonalds only had precooked burgers but it seems they only have not cooked ones?? I have no time to get sick.. []
People really still eat McDonalds :( []
The McDonalds in Spain has chicken wings. My heart is so happy  []
@McDonalds Please bring back the most delicious fast food sandwich of all times!!....The Arch Deluxe :P []
please hurry and open. I WANT A #McRib SANDWICH SO BAD! :D []
This morning i made a terrible decision by gettin mcdonalds and now my stomach is payin for it []


  


In [11]:
TRAINING_DATA = []

# Create a Doc object for each text in TEXTS
for doc in nlp.pipe(TEXTS):
    # Match on the doc and create a list of matched spans
    spans = [doc[start:end] for match_id, start, end in matcher(doc)]
    # Get (start character, end character, label) tuples of matches
    entities = [(span.start_char, span.end_char, 'GADGET') for span in spans]
    
    # Format the matches as a (doc.text, entities) tuple
    training_example = (doc.text, {'entities': entities})
    # Append the example to the training data
    TRAINING_DATA.append(training_example)
    
print(*TRAINING_DATA, sep='\n')    

('McDonalds is my favorite restaurant.', {'entities': []})
('Here I thought @McDonalds only had precooked burgers but it seems they only have not cooked ones?? I have no time to get sick..', {'entities': []})
('People really still eat McDonalds :(', {'entities': []})
('The McDonalds in Spain has chicken wings. My heart is so happy ', {'entities': []})
('@McDonalds Please bring back the most delicious fast food sandwich of all times!!....The Arch Deluxe :P', {'entities': []})
('please hurry and open. I WANT A #McRib SANDWICH SO BAD! :D', {'entities': []})
('This morning i made a terrible decision by gettin mcdonalds and now my stomach is payin for it', {'entities': []})


  


## The training loop

### Setting up the pipeline

In [17]:
# Create a blank 'en' model
nlp = spacy.blank('en')

# Create a new entity recognizer and add it to the pipeline
ner = nlp.create_pipe('ner')
nlp.add_pipe("ner",'GADGET')

# Add the label 'GADGET' to the entity recognizer
#nlp.add_label('GADGET')

<spacy.pipeline.ner.EntityRecognizer at 0x203be15dad8>

### Building a training loop


In [27]:
import random
from spacy.training.example import Example

In [None]:
import random
from spacy.training.example import Example

for batch in spacy.util.minibatch(TRAINING_DATA, size=2):
    for text, annotations in batch:
        # create Example
        doc = nlp.make_doc(text)
        example = Example.from_dict(doc, annotations)
        # Update the model
        nlp.update([example], losses=losses, drop=0.3)

In [None]:
# Process each text in TEST_DATA
for doc in nlp.pipe(TEST_DATA):
    # Print the document text and entitites
    print(doc.text)
    print(doc.ents, '\n\n')

## Training best practices