In [1]:
import nltk

In [None]:
nltk.download("popular")

# Tokenization

In [4]:
from nltk.tokenize import word_tokenize, sent_tokenize

In [5]:
text = "This is a sentence for NLTK. It is a python library for natural language processing."
words = word_tokenize(text)
sentences = sent_tokenize(text)
print("Word tokens:", words)
print("Sentence tokens:", sentences)

Word tokens: ['This', 'is', 'a', 'sentence', 'for', 'NLTK', '.', 'It', 'is', 'a', 'python', 'library', 'for', 'natural', 'language', 'processing', '.']
Sentence tokens: ['This is a sentence for NLTK.', 'It is a python library for natural language processing.']


# Part-of-speech (POS) tagging 

### Some exapmles: DT: determiner; IN: preposition or conjunction, subordinating; VBZ: verb, present tense, 3rd person singular

In [13]:
from nltk import pos_tag
pos_tags = pos_tag(words)
print("Part of speech tags")
for groups in pos_tags:
    print(groups)

Part of speech tags
('This', 'DT')
('is', 'VBZ')
('a', 'DT')
('sentence', 'NN')
('for', 'IN')
('NLTK', 'NNP')
('.', '.')
('It', 'PRP')
('is', 'VBZ')
('a', 'DT')
('python', 'JJ')
('library', 'NN')
('for', 'IN')
('natural', 'JJ')
('language', 'NN')
('processing', 'NN')
('.', '.')


# Lemmatization

In [14]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Lemmatized words:", lemmatized_words)

Lemmatized words: ['This', 'is', 'a', 'sentence', 'for', 'NLTK', '.', 'It', 'is', 'a', 'python', 'library', 'for', 'natural', 'language', 'processing', '.']


# Named Entity Recognition (NER)

In [16]:
from nltk import ne_chunk
text = "Steve Jobs was the CEO of Apple Inc. located in California."
words = word_tokenize(text)
pos_tags = pos_tag(words)
named_entities = ne_chunk(pos_tags)
print("Named Entities:")
print(named_entities)

Named Entities:
(S
  (PERSON Steve/NNP)
  (PERSON Jobs/NNP)
  was/VBD
  the/DT
  (ORGANIZATION CEO/NNP)
  of/IN
  (ORGANIZATION Apple/NNP Inc./NNP)
  located/VBD
  in/IN
  (GPE California/NNP)
  ./.)
