# **NER PRE-TRAINED Vs CUSTOM MODEL**

In [1]:
!pip install spacy



In [1]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [9]:
texts = [
    'John goes for a walk in Berlin',
    'Mike is going to the store',
    'Elon Musk is the CEO of Twitter',
    'Arthur Kakande is the CEO of Instagram'
]

In [7]:
ner_labels = nlp.pipe_labels['ner']
print(ner_labels)

['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']


In [8]:
#NEW LABELS
categories = ['PERSON', 'ORG', 'LOC']

In [10]:
docs = [nlp(text) for text in texts]

In [13]:
for doc in docs:
  entities = []
  for ent in doc.ents:
    #if ent.label_ in categories:
    entities.append((ent.text, ent.label_))
  print(entities)

[('John', 'PERSON'), ('Berlin', 'GPE')]
[('Mike', 'PERSON')]
[('Elon Musk', 'PERSON')]
[('Arthur Kakande', 'PERSON'), ('Instagram', 'ORG')]


In [14]:
texts = [
    'What is the price of 4 bananas',
    'How much are 16 chairs',
    'Give me the cost of five laptops'
]

In [15]:
docs = [nlp(text) for text in texts]
for doc in docs:
  entities = []
  for ent in doc.ents:
    #if ent.label_ in categories:
    entities.append((ent.text, ent.label_))
  print(entities)

[('4', 'CARDINAL')]
[('16', 'CARDINAL')]
[('five', 'CARDINAL')]


# **Training a Custom NER Model**

In [23]:
!pip install spacy-lookups-data

Collecting spacy-lookups-data
  Downloading spacy_lookups_data-1.0.5-py2.py3-none-any.whl.metadata (4.8 kB)
Downloading spacy_lookups_data-1.0.5-py2.py3-none-any.whl (98.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.5/98.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: spacy-lookups-data
Successfully installed spacy-lookups-data-1.0.5


In [24]:
from spacy.util import minibatch
from spacy.training.example import Example
import random

In [27]:
# loading a blank model
nlp = spacy.blank('en')

In [29]:
train_data = [
    ("What is the price of 10 bananas?", {"entities": [(21, 23, "QUANTITY"), (24, 31, "PRODUCT")]}),
    ("How much does 5 apples cost?", {"entities": [(15, 16, "QUANTITY"), (17, 23, "PRODUCT")]}),
    ("I need 2 liters of milk.", {"entities": [(7, 8, "QUANTITY"), (9, 15, "PRODUCT"), (19, 23, "PRODUCT")]}),
    ("Price of 3 kg potatoes?", {"entities": [(9, 10, "QUANTITY"), (11, 13, "UNIT"), (14, 22, "PRODUCT")]}),
    ("Can I get 12 eggs?", {"entities": [(11, 13, "QUANTITY"), (14, 18, "PRODUCT")]}),
    ("Cost of 1 loaf of bread?", {"entities": [(9, 10, "QUANTITY"), (11, 15, "UNIT"), (19, 24, "PRODUCT")]}),
    ("What’s the price for 500g sugar?", {"entities": [(20, 24, "QUANTITY"), (25, 27, "UNIT"), (28, 33, "PRODUCT")]}),
    ("I want to buy 4 oranges.", {"entities": [(15, 16, "QUANTITY"), (17, 24, "PRODUCT")]}),
    ("How much is 1 kg tomatoes?", {"entities": [(12, 13, "QUANTITY"), (14, 16, "UNIT"), (17, 24, "PRODUCT")]}),
    ("Price check for 6 avocados.", {"entities": [(17, 18, "QUANTITY"), (19, 27, "PRODUCT")]}),
    ("Cost of 2 packs of chips?", {"entities": [(9, 10, "QUANTITY"), (11, 16, "UNIT"), (20, 25, "PRODUCT")]}),
    ("I need 10 bottles of water.", {"entities": [(7, 9, "QUANTITY"), (10, 16, "UNIT"), (20, 25, "PRODUCT")]}),
    ("What’s the price of 8 lemons?", {"entities": [(19, 20, "QUANTITY"), (21, 27, "PRODUCT")]}),
    ("How much for 3 boxes of cereal?", {"entities": [(13, 14, "QUANTITY"), (15, 20, "UNIT"), (24, 30, "PRODUCT")]}),
    ("Price of 1 dozen bananas?", {"entities": [(9, 10, "QUANTITY"), (11, 16, "UNIT"), (17, 24, "PRODUCT")]}),
    ("Cost for 500 ml juice?", {"entities": [(9, 12, "QUANTITY"), (13, 15, "UNIT"), (16, 21, "PRODUCT")]}),
    ("I’d like to order 7 pizzas.", {"entities": [(19, 20, "QUANTITY"), (21, 27, "PRODUCT")]}),
    ("What is the cost of 2 kg flour?", {"entities": [(19, 20, "QUANTITY"), (21, 23, "UNIT"), (24, 29, "PRODUCT")]}),
    ("Price for 5 cans of soda?", {"entities": [(10, 11, "QUANTITY"), (12, 16, "UNIT"), (20, 24, "PRODUCT")]}),
    ("How much does 1 gallon of oil cost?", {"entities": [(15, 16, "QUANTITY"), (17, 23, "UNIT"), (27, 30, "PRODUCT")]}),
    ("I need 20 bars of chocolate.", {"entities": [(7, 9, "QUANTITY"), (10, 14, "UNIT"), (18, 27, "PRODUCT")]})
]


In [31]:
if 'ner' not in nlp.pipe_names:
  ner = nlp.add_pipe('ner')
else:
  ner = nlp.get_pipe('ner')

for _, annotations in train_data:
  for ent in annotations.get('entities'):
    if ent[2] not in ner.labels:
      ner.add_label(ent[2])


other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
  optimizer = nlp.begin_training()

  epochs = 40
  for epoch in range(epochs):
    random.shuffle(train_data)
    losses = {}
    batches = minibatch(train_data, size=2)
    for batch in batches:
      examples = []
      for text, annotation in batch:
        doc = nlp.make_doc(text)
        example = Example.from_dict(doc, annotation)
        examples.append(example)
      nlp.update(examples, drop=0.5, losses=losses)
    print(f'Epoch {epoch + 1}, Loss: {losses}')



Epoch 1, Loss: {'ner': np.float32(104.83377)}
Epoch 2, Loss: {'ner': np.float32(59.655003)}
Epoch 3, Loss: {'ner': np.float32(47.618637)}
Epoch 4, Loss: {'ner': np.float32(44.587128)}
Epoch 5, Loss: {'ner': np.float32(40.77451)}
Epoch 6, Loss: {'ner': np.float32(39.598267)}
Epoch 7, Loss: {'ner': np.float32(35.661034)}
Epoch 8, Loss: {'ner': np.float32(31.648365)}
Epoch 9, Loss: {'ner': np.float32(28.289684)}
Epoch 10, Loss: {'ner': np.float32(30.53318)}
Epoch 11, Loss: {'ner': np.float32(22.54139)}
Epoch 12, Loss: {'ner': np.float32(18.58731)}
Epoch 13, Loss: {'ner': np.float32(15.5071335)}
Epoch 14, Loss: {'ner': np.float32(12.270791)}
Epoch 15, Loss: {'ner': np.float32(10.723938)}
Epoch 16, Loss: {'ner': np.float32(6.834237)}
Epoch 17, Loss: {'ner': np.float32(10.752377)}
Epoch 18, Loss: {'ner': np.float32(6.874629)}
Epoch 19, Loss: {'ner': np.float32(5.7006207)}
Epoch 20, Loss: {'ner': np.float32(7.131642)}
Epoch 21, Loss: {'ner': np.float32(5.617788)}
Epoch 22, Loss: {'ner': np.fl

In [32]:
nlp.to_disk('custom_ner_model')

In [33]:
trained_nlp = spacy.load('custom_ner_model')

In [37]:
test_texts = [
    'What is the price of 4 bananas',
    'How much are 16 chairs',
    'Give me the cost of five laptops',
    'What is the price of 10 bananas?',
    'How much does 5 apples cost?'
]

In [38]:
# Test the trained model
for text in test_texts:
    doc = trained_nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print(f"Text: {text}")
    print(f"Entities: {entities}")

Text: What is the price of 4 bananas
Entities: [('bananas', 'PRODUCT')]
Text: How much are 16 chairs
Entities: [('16', 'QUANTITY'), ('chairs', 'PRODUCT')]
Text: Give me the cost of five laptops
Entities: []
Text: What is the price of 10 bananas?
Entities: [('10', 'QUANTITY'), ('bananas', 'PRODUCT')]
Text: How much does 5 apples cost?
Entities: []
