In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
doc1 = nlp('Japan faces a unique challenge as its economy has been stagnant for decades, compared to the more buoyant economies of rivals the US and China')
doc2 = nlp('Japan also relies heavily on exporting its goods and has little control over consumer demand in other countries, which has been severely impacted by coronavirus lockdowns. Many of its biggest brands, such as car firms Toyota and Honda, have seen sales slump across the world.')

In [4]:
for i in doc1:
    print(i , i.pos_ , i.tag_ , spacy.explain(i.tag_))

Japan PROPN NNP noun, proper singular
faces VERB VBZ verb, 3rd person singular present
a DET DT determiner
unique ADJ JJ adjective
challenge NOUN NN noun, singular or mass
as SCONJ IN conjunction, subordinating or preposition
its PRON PRP$ pronoun, possessive
economy NOUN NN noun, singular or mass
has AUX VBZ verb, 3rd person singular present
been AUX VBN verb, past participle
stagnant ADJ JJ adjective
for ADP IN conjunction, subordinating or preposition
decades NOUN NNS noun, plural
, PUNCT , punctuation mark, comma
compared VERB VBN verb, past participle
to ADP IN conjunction, subordinating or preposition
the DET DT determiner
more ADV RBR adverb, comparative
buoyant ADJ JJ adjective
economies NOUN NNS noun, plural
of ADP IN conjunction, subordinating or preposition
rivals NOUN NNS noun, plural
the DET DT determiner
US PROPN NNP noun, proper singular
and CCONJ CC conjunction, coordinating
China PROPN NNP noun, proper singular


In [5]:
pos_count = doc1.count_by(spacy.attrs.POS)
for k,v in pos_count.items():
    print(f' {doc1.vocab[k].text} : {v}')

 PROPN : 3
 VERB : 2
 DET : 3
 ADJ : 3
 NOUN : 5
 SCONJ : 1
 PRON : 1
 AUX : 2
 ADP : 3
 PUNCT : 1
 ADV : 1
 CCONJ : 1


In [6]:
def ner(text):
    if text.ents:
        for ent in text.ents:
            print(ent.text+ ' - '+ent.label_)
    else:
        print("No matter entities")

In [7]:
ner(doc2)

Japan - GPE
Toyota - ORG
Honda - ORG


In [8]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [9]:
phare_lists = ['covid-19','covid19','Covid19','Covid-19','coronavirus','Coronavirus']
pattern = [nlp(virus) for virus in phare_lists]

In [10]:
matcher.add("Virus",None,*pattern)

In [11]:
match = matcher(doc2)

In [12]:
from spacy.tokens import Span

In [13]:
for match_id,start,end in match:
    span = [Span(doc2,start,end,label = doc2.vocab.strings[u'virus'])]

In [14]:
doc2.ents = list(doc2.ents) + span

In [15]:
ner(doc2)

Japan - GPE
coronavirus - virus
Toyota - ORG
Honda - ORG


In [16]:
list(doc2.ents)

[Japan, coronavirus, Toyota, Honda]

In [17]:
span

[coronavirus]