# Finding Confidence Score for Entities

For Customized Entities with improved results

# 1)- Importing key Modules

In [1]:
# for Python 2: use print only as a function
from __future__ import print_function

In [2]:
import spacy
from spacy.pipeline import EntityRuler
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy import displacy
import sys
import pandas as pd
from collections import defaultdict
nlp = spacy.load('en_core_web_sm')

# 2)- Instantiate scorer from spacy

In [3]:
from spacy.scorer import Scorer

scorer = Scorer()

# 3)- Defining list of entity types

In [4]:
flowers = ["rose", "tulip", "african daisy","gardenias"]
animals = ["cat", "dog", "artic fox","fox","wolf"]

# 4)- Create rulerAll

In [5]:
rulerAll = EntityRuler(nlp, overwrite_ents=True)

In [6]:
for f in flowers:
    rulerAll.add_patterns([{"label": "flower", "pattern": f}])
for a in animals:
    rulerAll.add_patterns([{"label": "animal", "pattern": a}])

In [7]:
print(nlp.pipe_names)

['tagger', 'parser', 'ner']


# 5)- Adding rulerAll to given pipeline

In [8]:
# in this case, we shall use same ruler for all classes
rulerAll.name = 'rulerAll'

In [9]:
nlp.add_pipe(rulerAll)

In [10]:
print(nlp.pipe_names)

['tagger', 'parser', 'ner', 'rulerAll']


# 6)- Sample text

In [11]:
text="cat, fox, dog, wolf and artic fox are one class,and plant, african daisy, rose ,tulip, gardenias are other class"

In [12]:
doc=nlp(text)

In [13]:
displacy.render(doc,style='ent',jupyter=True)

# 7)- Applying beams search algorithm

In [14]:
threshold = 0.2
beams = nlp.entity.beam_parse([ doc ], beam_width = 16, beam_density = 0.0001)

In [15]:
beams

[<thinc.extra.search.Beam at 0x12458c7a0>]

In [16]:
entity_scores = defaultdict(float)
for beam in beams:
    for score, ents in nlp.entity.moves.get_beam_parses(beam):
        for start, end, label in ents:
            entity_scores[(start, end, label)] += score

print ('Entities and scores (detected with beam search)')
for key in entity_scores:
    start, end, label = key
    score = entity_scores[key]
    if ( score > threshold):
        #print ('KEY: {}, VALUE: {}, CONFIDENCE: {}'.format(label, doc[start:end], score))
        print("KEY:", label, "->","VALUE:",doc[start:end],"->","CONFIDENCE:","->",round(score,3))

Entities and scores (detected with beam search)
KEY: animal -> VALUE: cat -> CONFIDENCE: -> 1.0
KEY: animal -> VALUE: fox -> CONFIDENCE: -> 1.0
KEY: animal -> VALUE: dog -> CONFIDENCE: -> 1.0
KEY: animal -> VALUE: wolf -> CONFIDENCE: -> 1.0
KEY: animal -> VALUE: artic fox -> CONFIDENCE: -> 1.0
KEY: CARDINAL -> VALUE: one -> CONFIDENCE: -> 1.0
KEY: flower -> VALUE: african daisy -> CONFIDENCE: -> 1.0
KEY: flower -> VALUE: rose -> CONFIDENCE: -> 1.0
KEY: flower -> VALUE: tulip -> CONFIDENCE: -> 1.0
KEY: flower -> VALUE: gardenias -> CONFIDENCE: -> 1.0


### Using Options

In [17]:
options={'ents':['FLOWER']}

In [18]:
displacy.render(doc,style='ent',jupyter=True, options=options)