In [7]:
import spacy
import json
from spacy import displacy


nlp = spacy.load('en_core_web_sm')
ruler = nlp.add_pipe("entity_ruler")
pt = [{'label': 'ahihihi','pattern' : 'friendship'},
           {'label': 'LOC','pattern' : {'LOWER' : 'continent'}}]

ruler.add_patterns(pt)

doc = nlp(
    """
The little payment for her pamphlet on the "Education of Daughters"
caused Mary Wollstonecraft to think more seriously of earning by her pen.
The pamphlet seems also to have advanced her credit as a teacher.  After
giving up her day school, she spent some weeks at Eton with the Rev. Mr.
Prior, one of the masters there, who recommended her as governess to the
daughters of Lord Kingsborough, an Irish viscount, eldest son of the Earl
of Kingston.  Her way of teaching was by winning love, and she obtained
the warm affection of the eldest of her pupils, who became afterwards
Countess Mount-Cashel.  In the summer of 1787, Lord Kingsborough's
family, including Mary Wollstonecraft, was at Bristol Hot-wells, before
going to the Continent.  While there, Mary Wollstonecraft wrote her
little tale published as "Mary, a Fiction," wherein there was much based
on the memory of her own friendship for Fanny Blood.
"""
)
# ner_text = open()

my_list = []
my_dict = {'text': None, 'entity type' : None, 'explanation' : None}
for ent in doc.ents:
    my_dict = {'text': ent.text, 'entity type' : ent.label_, 'explanation' : spacy.explain(ent.label_)}
    my_list.append(my_dict)
    print(f"Text: {ent.text}, The entity type: {ent.label_}, explanation: {spacy.explain(ent.label_)}")
    
with open("test.json", "w") as fp:
    json.dump(my_list, fp)

The name of the entity: the "Education of Daughters, The entity type code, based on the: LAW, An explanation of the entity type code: Named documents made into laws.
The name of the entity: Mary Wollstonecraft, The entity type code, based on the: PERSON, An explanation of the entity type code: People, including fictional
The name of the entity: some weeks, The entity type code, based on the: DATE, An explanation of the entity type code: Absolute or relative dates or periods
The name of the entity: Eton, The entity type code, based on the: ORG, An explanation of the entity type code: Companies, agencies, institutions, etc.
The name of the entity: one, The entity type code, based on the: CARDINAL, An explanation of the entity type code: Numerals that do not fall under another type
The name of the entity: Kingsborough, The entity type code, based on the: PERSON, An explanation of the entity type code: People, including fictional
The name of the entity: Irish, The entity type code, based o



In [5]:
dict

{'LAW': ['the "Education of Daughters'],
 'PERSON': ['Mary Wollstonecraft',
  'Kingsborough',
  'Earl',
  'Mount-Cashel',
  'Kingsborough',
  'Mary Wollstonecraft',
  'Mary Wollstonecraft'],
 'DATE': ['some weeks', 'the summer of 1787'],
 'ORG': ['Eton', 'Bristol Hot-wells'],
 'CARDINAL': ['one'],
 'NORP': ['Irish'],
 'GPE': ['Kingston', 'Countess'],
 'WORK_OF_ART': ['Mary, a Fiction'],
 'ahihihi': ['friendship'],
 'FAC': ['Fanny Blood']}

In [20]:
nlp.meta["sources"]

[{'name': 'OntoNotes 5',
  'url': 'https://catalog.ldc.upenn.edu/LDC2013T19',
  'license': 'commercial (licensed by Explosion)',
  'author': 'Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston'},
 {'name': 'ClearNLP Constituent-to-Dependency Conversion',
  'url': 'https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md',
  'license': 'Citation provided for reference, no code packaged with model',
  'author': 'Emory University'},
 {'name': 'WordNet 3.0',
  'url': 'https://wordnet.princeton.edu/',
  'author': 'Princeton University',
  'license': 'WordNet 3.0 License'},
 {'name': 'roberta-base',
  'author': 'Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and Luke Zettlemoyer and Veselin Stoyanov',
  'url': 'https://github.com/pytorch/

In [4]:
## Import Counter to count named entities
from collections import Counter


In [15]:
print(spacy.explain(ent.label_))

People, including fictional


In [2]:
# des
print(nlp.pipe_labels)

{'transformer': [], 'tagger': ['$', "''", ',', '-LRB-', '-RRB-', '.', ':', 'ADD', 'AFX', 'CC', 'CD', 'DT', 'EX', 'FW', 'HYPH', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NFP', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB', 'XX', '``'], 'parser': ['ROOT', 'acl', 'acomp', 'advcl', 'advmod', 'agent', 'amod', 'appos', 'attr', 'aux', 'auxpass', 'case', 'cc', 'ccomp', 'compound', 'conj', 'csubj', 'csubjpass', 'dative', 'dep', 'det', 'dobj', 'expl', 'intj', 'mark', 'meta', 'neg', 'nmod', 'npadvmod', 'nsubj', 'nsubjpass', 'nummod', 'oprd', 'parataxis', 'pcomp', 'pobj', 'poss', 'preconj', 'predet', 'prep', 'prt', 'punct', 'quantmod', 'relcl', 'xcomp'], 'attribute_ruler': [], 'lemmatizer': [], 'ner': ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']}


In [3]:
print(nlp.pipe_labels['ner'])

['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']


In [5]:
print("Count",Counter(dict).most_common(3))

Count [('GPE', ['U.K.']), ('ORG', ['Apple', 'OpenAI']), ('MONEY', ['$1 billion'])]


In [7]:
ruler = nlp.add_pipe("entity_ruler")

In [7]:
patterns = [{'label': 'ahihihi','pattern' : 'friendship'},
           {'label': 'LOC','pattern' : {'LOWER' : 'continent','DOC':'Kingston'}}]

In [22]:
dict

{'WORK_OF_ART': ['the "Education of Daughters"', 'Mary, a Fiction'],
 'PERSON': ['Mary Wollstonecraft',
  'Prior',
  'Kingsborough',
  'Mount-Cashel',
  'Kingsborough',
  'Mary Wollstonecraft',
  'Mary Wollstonecraft',
  'Fanny Blood'],
 'DATE': ['some weeks', 'the summer of 1787'],
 'FAC': ['Eton'],
 'CARDINAL': ['one'],
 'NORP': ['Irish'],
 'GPE': ['Kingston'],
 'ORG': ['Bristol Hot-wells'],
 'LOC': ['Continent'],
 'ahihihi': ['friendship']}

In [None]:
import spacy
import json
from spacy import displacy


nlp = spacy.load('en_core_web_sm')
ruler = nlp.add_pipe("entity_ruler")
pt = [{'label': 'ahihihi','pattern' : 'friendship'},
           {'label': 'LOC','pattern' : {'LOWER' : 'continent'}}]

ruler.add_patterns(pt)

doc = nlp(
    """
The little payment for her pamphlet on the "Education of Daughters"
caused Mary Wollstonecraft to think more seriously of earning by her pen.
The pamphlet seems also to have advanced her credit as a teacher.  After
giving up her day school, she spent some weeks at Eton with the Rev. Mr.
Prior, one of the masters there, who recommended her as governess to the
daughters of Lord Kingsborough, an Irish viscount, eldest son of the Earl
of Kingston.  Her way of teaching was by winning love, and she obtained
the warm affection of the eldest of her pupils, who became afterwards
Countess Mount-Cashel.  In the summer of 1787, Lord Kingsborough's
family, including Mary Wollstonecraft, was at Bristol Hot-wells, before
going to the Continent.  While there, Mary Wollstonecraft wrote her
little tale published as "Mary, a Fiction," wherein there was much based
on the memory of her own friendship for Fanny Blood.
"""
)
# ner_text = open()

my_list = []
my_dict = {'text': None, 'entity type' : None, 'explanation' : None}
for ent in doc.ents:
    my_dict = {'text': ent.text, 'entity type' : ent.label_, 'explanation' : spacy.explain(ent.label_)}
    my_list.append(my_dict)
    print(f"Text: {ent.text}, The entity type: {ent.label_}, explanation: {spacy.explain(ent.label_)}")