In [1]:
import spacy

In [3]:
nlp = spacy.blank("en")

nlp.pipe_names

[]

In [5]:
nlp = spacy.load("en_core_web_sm")

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
doc = nlp("Mr. John eats sandwiches for 2$ during his job")

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_) # "pos" describe part of speech np. {VERB, NUM} | lemma is a base word np. {eats | eat, sandwiches  | sandwich}
    

Mr.  |  PROPN  |  Mr.
John  |  PROPN  |  John
eats  |  VERB  |  eat
sandwiches  |  NOUN  |  sandwich
for  |  ADP  |  for
2  |  NUM  |  2
$  |  NUM  |  $
during  |  ADP  |  during
his  |  PRON  |  his
job  |  NOUN  |  job


In [9]:
doc = nlp(" Since 2000 non-financial corporate debt across America and Europe has grown from $12.7trn to $38.1trn")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

2000  |  DATE  |  Absolute or relative dates or periods
America  |  GPE  |  Countries, cities, states
Europe  |  LOC  |  Non-GPE locations, mountain ranges, bodies of water
12.7trn  |  MONEY  |  Monetary values, including unit
38.1trn  |  MONEY  |  Monetary values, including unit


In [11]:
from spacy import displacy
displacy.render(doc, style = "ent")

In [14]:
nlp = spacy.load("es_core_news_sm")

doc = nlp("Desde 2000, la deuda corporativa no financiera en América y Europa ha aumentado de $12,7 billones a $38,1 billones.")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

América  |  LOC  |  Non-GPE locations, mountain ranges, bodies of water
Europa  |  LOC  |  Non-GPE locations, mountain ranges, bodies of water


In [15]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source = source_nlp)
nlp.pipe_names

['ner']

In [17]:
doc = nlp("Since 2000 non-financial corporate debt across America and Europe has grown from $12.7trn to $38.1trn")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

2000  |  DATE  |  Absolute or relative dates or periods
America  |  GPE  |  Countries, cities, states
Europe  |  LOC  |  Non-GPE locations, mountain ranges, bodies of water
12.7trn  |  MONEY  |  Monetary values, including unit
38.1trn  |  MONEY  |  Monetary values, including unit


In [5]:
text = '''John and Mike are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Jack to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

all_proper_nouns = []

for token in doc:
    if token.pos_ == "PROPN":
        all_proper_nouns.append(token)
print("Proper Nouns: ", all_proper_nouns)
print("Count: ", len(all_proper_nouns))

Proper Nouns:  [John, Mike, Paris, London, Dubai, Rome, Jack, Hyderabad]
Count:  8


In [10]:
text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
Spain are Iberdrola SA, Santander, CaixaBank, Inditex and BBVA '''

doc = nlp(text)

all_company_names = []

for ent in doc.ents:
    if ent.label_ == "ORG":
        all_company_names.append(ent)
print("Company Names: ", all_company_names)
print("Count: ", len(all_company_names))    
    

Company Names:  [Tesla, Walmart, Amazon, Microsoft, Google, Iberdrola SA, Santander, CaixaBank,, Inditex, BBVA]
Count:  9
