In [1]:
# Tokenization
from nltk.tokenize import word_tokenize, sent_tokenize

text = "Natural Language Processing is fascinating. It helps computers understand human language."
words = word_tokenize(text)
sentences = sent_tokenize(text)

print(words)        # ['Natural', 'Language', 'Processing', 'is', 'fascinating', '.', 'It', 'helps', 'computers', 'understand', 'human', 'language', '.']
print(sentences)    # ['Natural Language Processing is fascinating.', 'It helps computers understand human language.']

['Natural', 'Language', 'Processing', 'is', 'fascinating', '.', 'It', 'helps', 'computers', 'understand', 'human', 'language', '.']
['Natural Language Processing is fascinating.', 'It helps computers understand human language.']


In [2]:
# Stop Words Removal
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]

print(filtered_words)   # ['Natural', 'Language', 'Processing', 'fascinating', '.', 'helps', 'computers', 'understand', 'human', 'language', '.']


['Natural', 'Language', 'Processing', 'fascinating', '.', 'helps', 'computers', 'understand', 'human', 'language', '.']


In [3]:
# Stemming and Lemmatization
from nltk.stem import PorterStemmer, WordNetLemmatizer

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

print(stemmer.stem("running"))     # 'run'
print(lemmatizer.lemmatize("better", pos="a"))  # 'good'

run
good


In [4]:
# Part-of-Speech (POS) Tagging
from nltk import pos_tag
tagged_words = pos_tag(words)
print(tagged_words)   # [('Natural', 'JJ'), ('Language', 'NN'), ('Processing', 'NN'), ('is', 'VBZ'), ('fascinating', 'JJ'), ('.', '.'), ...]

[('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('is', 'VBZ'), ('fascinating', 'VBG'), ('.', '.'), ('It', 'PRP'), ('helps', 'VBZ'), ('computers', 'NNS'), ('understand', 'VBP'), ('human', 'JJ'), ('language', 'NN'), ('.', '.')]


In [5]:
# Named Entity Recognition (NER)
from nltk import ne_chunk
named_entities = ne_chunk(tagged_words)
print(named_entities)

(S
  Natural/JJ
  Language/NNP
  Processing/NNP
  is/VBZ
  fascinating/VBG
  ./.
  It/PRP
  helps/VBZ
  computers/NNS
  understand/VBP
  human/JJ
  language/NN
  ./.)


In [6]:
# WordNet Interface
from nltk.corpus import wordnet
synonyms = wordnet.synsets("computer")
print(synonyms[0].definition())    # 'a machine for performing calculations automatically'

a machine for performing calculations automatically
