In [9]:
import spacy
import nltk

In [10]:
nlp = spacy.load('en_core_web_sm')

In [11]:
doc = nlp("Hey this is Miss.Madhumitha . I am a Machine Learning Engineer, I work on NLP CNN. etc.")

In [12]:
for sentence in doc.sents:
    print(sentence)

Hey this is Miss.Madhumitha .
I am a Machine Learning Engineer, I work on NLP CNN. etc.


In [13]:
from nltk.tokenize import sent_tokenize

sent_tokenize('Dr.Strange loves pav bhaji of India. Hulk loves Thai')

['Dr.Strange loves pav bhaji of India.', 'Hulk loves Thai']

In [14]:
from nltk.tokenize import word_tokenize

word_tokenize('Dr.Strange loves pav bhaji of India. Hulk loves Thai')

['Dr.Strange',
 'loves',
 'pav',
 'bhaji',
 'of',
 'India',
 '.',
 'Hulk',
 'loves',
 'Thai']

In [15]:
nlp = spacy.blank('en') # Blank processing pipeline

doc = nlp("Hey this is Miss.Madhumitha . I am a Machine Learning Engineer, I work on NLP CNN. etc.")

for token in doc:
    print(token)

Hey
this
is
Miss.
Madhumitha
.
I
am
a
Machine
Learning
Engineer
,
I
work
on
NLP
CNN
.
etc
.


In [16]:
nlp.pipe_names

[]

In [17]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [18]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x20398445bb0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x2038ecc7ef0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x2038ea57a70>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x2039840ced0>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x20398407290>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2038ea57680>)]

In [20]:
doc = nlp("Hey this is Miss.Madhumitha . I am a Machine Learning Engineer, I work on NLP CNN. etc. I ate a somosa.")

for token in doc:
    print(token, "|", token.pos_, "|", token.lemma_)

Hey | INTJ | hey
this | PRON | this
is | AUX | be
Miss. | PROPN | Miss.
Madhumitha | PROPN | Madhumitha
. | PUNCT | .
I | PRON | I
am | AUX | be
a | DET | a
Machine | PROPN | Machine
Learning | PROPN | Learning
Engineer | PROPN | Engineer
, | PUNCT | ,
I | PRON | I
work | VERB | work
on | ADP | on
NLP | PROPN | NLP
CNN | PROPN | CNN
. | PUNCT | .
etc | X | etc
. | PUNCT | .
I | PRON | I
ate | VERB | eat
a | DET | a
somosa | NOUN | somosa
. | PUNCT | .


In [22]:
doc = nlp("I work at Google Inc. I have 45 billion")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Google Inc. | ORG | Companies, agencies, institutions, etc.
45 billion | MONEY | Monetary values, including unit


In [24]:
from spacy import displacy

displacy.render(doc, style = "ent")

In [25]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")

nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [30]:
doc = nlp("I work at Google . I have 45 billion")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Google | ORG | Companies, agencies, institutions, etc.
45 billion | MONEY | Monetary values, including unit


In [31]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

words = ["apple", "banana", "car", "dog", "cat", "house", "tree", "book", "computer", "chair"]

for word in words:
    print(word, "|", stemmer.stem(word))

apple | appl
banana | banana
car | car
dog | dog
cat | cat
house | hous
tree | tree
book | book
computer | comput
chair | chair


In [43]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("bro apple banana car better dog eating sleeping cat house tree book computer chair")

for token in doc:
    print(token, "|", token.lemma_)

bro | bro
apple | apple
banana | banana
car | car
better | well
dog | dog
eating | eat
sleeping | sleep
cat | cat
house | house
tree | tree
book | book
computer | computer
chair | chair


In [35]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [39]:
ar = nlp.get_pipe('attribute_ruler')
ar.add([[{"TEXT":"Bro"}], [{"TEXT":"Brah"}]] , {"LEMMA":"Brother"})

doc = nlp("Bro, what you doing Brah")
for token in doc:
    print(token, "|", token.lemma_)

Bro | Brother
, | ,
what | what
you | you
doing | do
Brah | Brother


In [40]:
doc[0].lemma_

'Brother'