In [1]:
import spacy

In [5]:
import spacy
from spacy.matcher import Matcher
from spacy.lang.en import English

# list of texts
TEXTS = [
    'How to preorder the iPhone X', 
    'iPhone X is coming', 
    'Should I pay $1,000 for the iPhone X?', 
    'The iPhone 8 reviews are here', 
    "iPhone 11 vs iPhone 8: What's the difference?", 
    'I need a new phone! Any tips?'
]

nlp = English()
matcher = Matcher(nlp.vocab)

pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]
pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": True}]

matcher.add("GADGET", [pattern1, pattern2])

TRAINING_DATA = []

for doc in nlp.pipe(TEXTS):
    spans = [doc[start:end] for match_id, start, end in matcher(doc)]
    
    entities = [(span.start_char, span.end_char, "GADGET") for span in spans]
    
    training_example = (doc.text, {"entities": entities})
    
    TRAINING_DATA.append(training_example)
    
print(*TRAINING_DATA, sep='\n')


('How to preorder the iPhone X', {'entities': [(20, 28, 'GADGET')]})
('iPhone X is coming', {'entities': [(0, 8, 'GADGET')]})
('Should I pay $1,000 for the iPhone X?', {'entities': [(28, 36, 'GADGET')]})
('The iPhone 8 reviews are here', {'entities': [(4, 12, 'GADGET')]})
("iPhone 11 vs iPhone 8: What's the difference?", {'entities': [(0, 9, 'GADGET'), (13, 21, 'GADGET')]})
('I need a new phone! Any tips?', {'entities': []})


In [9]:
import spacy
import random
from spacy.training import Example
from spacy.util import minibatch

examples = [
    ['How to preorder the iPhone X', {'entities': [(20, 28, 'GADGET')]}], 
    ['iPhone X is coming', {'entities': [(0, 8, 'GADGET')]}], 
    ['Should I pay $1,000 for the iPhone X?', {'entities': [(28, 36, 'GADGET')]}], 
    ['The iPhone 8 reviews are here', {'entities': [(4, 12, 'GADGET')]}], 
    ['Your iPhone goes up to 11 today', {'entities': [(5, 11, 'GADGET')]}], 
    ['I need a new phone! Any tips?', {'entities': []}]
]

nlp = spacy.blank("en")

if "ner" not in nlp.pipe_names:
    ner = nlp.add_pipe("ner", last=True)

ner.add_label("GADGET")

nlp.initialize()

for iteration in range(50):
    random.shuffle(examples)
    losses = {}
    batches = minibatch(examples, size=2)
    
    for batch in batches:
        texts = [text for text, annotations in batch]
        annotations = [annotations for text, annotations in batch]
        
        examples_batch = [Example.from_dict(nlp.make_doc(text), annotation) for text, annotation in zip(texts, annotations)]
        
        nlp.update(examples=examples_batch, losses=losses, drop=0.5)
    
    print(f"Iteration {iteration + 1}, Losses: {losses}")

nlp.to_disk("ner_model")


Iteration 1, Losses: {'ner': 33.369244396686554}
Iteration 2, Losses: {'ner': 28.604082584381104}
Iteration 3, Losses: {'ner': 23.18955487012863}
Iteration 4, Losses: {'ner': 17.599724858999252}
Iteration 5, Losses: {'ner': 12.705267533659935}
Iteration 6, Losses: {'ner': 10.056395938619971}
Iteration 7, Losses: {'ner': 8.688676697405754}
Iteration 8, Losses: {'ner': 8.305009263451211}
Iteration 9, Losses: {'ner': 8.039836566778831}
Iteration 10, Losses: {'ner': 11.590834258095128}
Iteration 11, Losses: {'ner': 8.763339522655542}
Iteration 12, Losses: {'ner': 7.671925151346841}
Iteration 13, Losses: {'ner': 7.51165466654038}
Iteration 14, Losses: {'ner': 9.626840226197146}
Iteration 15, Losses: {'ner': 5.99850064320708}
Iteration 16, Losses: {'ner': 6.060723724150421}
Iteration 17, Losses: {'ner': 5.578195828218668}
Iteration 18, Losses: {'ner': 3.0966112087226065}
Iteration 19, Losses: {'ner': 3.790778921229503}
Iteration 20, Losses: {'ner': 2.0184103843882384}
Iteration 21, Losses: {

In [10]:
import spacy

nlp = spacy.load("ner_model")


In [11]:
texts = [
    "I just bought a new iPhone 13!",
    "Is the Samsung Galaxy S21 better than the iPhone 12?",
    "The iPhone X and iPhone 11 are both great phones.",
    "What do you think about the new iPhone 15 Pro?",
    "My friend got the iPhone 8, and he loves it."
]

for text in texts:
    doc = nlp(text)
    print(f"Text: {text}")
    
    for ent in doc.ents:
        print(f" - Entity: {ent.text}, Label: {ent.label_}")
    
    print("\n")


Text: I just bought a new iPhone 13!
 - Entity: iPhone 13, Label: GADGET


Text: Is the Samsung Galaxy S21 better than the iPhone 12?
 - Entity: iPhone 12, Label: GADGET


Text: The iPhone X and iPhone 11 are both great phones.
 - Entity: iPhone X, Label: GADGET
 - Entity: iPhone 11, Label: GADGET


Text: What do you think about the new iPhone 15 Pro?
 - Entity: iPhone 15, Label: GADGET


Text: My friend got the iPhone 8, and he loves it.
 - Entity: iPhone 8, Label: GADGET
 - Entity: it., Label: GADGET


