# Custom Named Entity Recognition 

- Add custom entities in form of table

# 1)- Import Key Modules

In [1]:
# for Python 2: use print only as a function
from __future__ import print_function

In [2]:
import spacy
from spacy.pipeline import EntityRuler
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy import displacy

# 2)- Load Pre-trained Model

In [3]:
nlp = spacy.load('en_core_web_sm', disable = ['ner'])

# 3)- Defining custom entities

flowers and animals in our case

### a.list classes

In [4]:
flowers = ["rose", "tulip", "african daisy"]
animals = ["cat", "dog", "artic fox"]

### b.instantiate ruler

In [5]:
rulerAll = EntityRuler(nlp, overwrite_ents=True)

### c.add pattern

In [6]:
for f in flowers:
    rulerAll.add_patterns([{"label": "flower", "pattern": f}])
for a in animals:
    rulerAll.add_patterns([{"label": "animal", "pattern": a}])

In [7]:
print(nlp.pipe_names)

['tagger', 'parser']


**We have no ruler added so far**

### d. Give a custom name for ruler

In [8]:
# in this case, we shall use same ruler for all classes
rulerAll.name = 'rulerAll'

### e. Add ruler name to nlp pipeline

In [9]:
nlp.add_pipe(rulerAll)

In [10]:
print(nlp.pipe_names)

['tagger', 'parser', 'rulerAll']


# 4)- Test on Text

In [11]:
text="cat, fox, dog, wolf and artic fox are one class,and plant, african daisy, rose ,tulip, gardenias are other class"

In [12]:
# convert to spacy token
doc=nlp(text)

In [13]:
for ent in doc.ents:
    print(ent.text , '->', ent.label_,'start',ent.start,'end',ent.end)

cat -> animal start 0 end 1
dog -> animal start 4 end 5
artic fox -> animal start 8 end 10
african daisy -> flower start 17 end 19
rose -> flower start 20 end 21
tulip -> flower start 22 end 23


# 5)-Results as Table

In [14]:
import pandas as pd
df_ent = pd.DataFrame({'ENT_DETECT': [], 'ENT_LABEL': [],'START':[] , 'END' :[]})

In [15]:
df_ent

Unnamed: 0,ENT_DETECT,ENT_LABEL,START,END


In [16]:
df_ent['ENT_DETECT']=[x.text for x in doc.ents]
df_ent['ENT_LABEL']=[ent for ent in doc.ents if ent.label_ == "flower" or ent.label_ == "animal"]
df_ent['START']=[x.start_char for x in doc.ents]
df_ent['END']=[x.end_char for x in doc.ents]

In [17]:
df_ent

Unnamed: 0,ENT_DETECT,ENT_LABEL,START,END
0,cat,(cat),0,3
1,dog,(dog),10,13
2,artic fox,"(artic, fox)",24,33
3,african daisy,"(african, daisy)",59,72
4,rose,(rose),74,78
5,tulip,(tulip),80,85


# END OF NOTEBOOK