In [4]:
import nltk

from nltk.tokenize import sent_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/darkospy/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Load text

In [7]:
text = """\
Here are five creative ideas for repurposing your kids' art and reducing clutter:

Create a Memory Book: Compile their artwork into a scrapbook or photo album. This way, you can preserve their artistic journey over the years in a neat and organized manner.

DIY Wall Art: Select some of the best pieces and frame them to create unique wall art for your home. This way, you can showcase their creations while adding a personalized touch to your decor.

Custom Gift Wrap: Use their art to create custom gift wrap for special occasions. Simply wrap presents in their drawings and paintings for a heartfelt and one-of-a-kind touch.

Collage or Mosaic: Cut or tear the artwork into smaller pieces and create collages or mosaics. This is a great way to make new artworks while incorporating their original creations.

Turn Art into Accessories: Transform their art into wearable accessories like jewelry or keychains. There are online services that can turn drawings into pendants, pins, or even temporary tattoos."""

### Split Sentence Tokenize

In [8]:
# split the text into sentences
sentences = sent_tokenize(text)
print(sentences[1])

This way, you can preserve their artistic journey over the years in a neat and organized manner.


In [10]:
import re
new_text = re.sub(r"[^a-zA-Z0-9]", " ", sentences[1])
print(new_text)

This way  you can preserve their artistic journey over the years in a neat and organized manner 


### Tokenize

In [11]:
from nltk.tokenize import word_tokenize
tokenized_words = word_tokenize(new_text)
print(tokenized_words)

['This', 'way', 'you', 'can', 'preserve', 'their', 'artistic', 'journey', 'over', 'the', 'years', 'in', 'a', 'neat', 'and', 'organized', 'manner']


### Stop words removel

In [16]:
nltk.download('stopwords')
from nltk.corpus import stopwords
without_stopwords = [word for word in tokenized_words if word not in stopwords.words("english")]
print(without_stopwords)

['This', 'way', 'preserve', 'artistic', 'journey', 'years', 'neat', 'organized', 'manner']


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/darkospy/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Stemming

In [17]:
from nltk.stem.snowball import SnowballStemmer
sn_stemmer = SnowballStemmer("english")
stemmed_words = [sn_stemmer.stem(w) for w in without_stopwords]
print(stemmed_words)

['this', 'way', 'preserv', 'artist', 'journey', 'year', 'neat', 'organ', 'manner']


### Lemmatize

In [21]:
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.stem.wordnet import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(w) for w in without_stopwords]
print(lemmatized_words)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/darkospy/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/darkospy/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


['This', 'way', 'preserve', 'artistic', 'journey', 'year', 'neat', 'organized', 'manner']


### Tagging

In [23]:
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
from nltk import pos_tag
pos_tag(without_stopwords)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/darkospy/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /Users/darkospy/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!


[('This', 'DT'),
 ('way', 'NN'),
 ('preserve', 'VB'),
 ('artistic', 'JJ'),
 ('journey', 'NN'),
 ('years', 'NNS'),
 ('neat', 'RB'),
 ('organized', 'VBD'),
 ('manner', 'NN')]

### Name Entity Recognition

In [25]:
from nltk import ne_chunk
nltk.download('words')
ner_tree = ne_chunk(pos_tag(word_tokenize("The two billionaires' business interests have butted heads in the past: Musk's 2016 test launch of a SpaceX rocket destroyed Zuckerberg's US$200 million satellite.")))
print(ner_tree)

(S
  The/DT
  two/CD
  billionaires/NNS
  '/POS
  business/NN
  interests/NNS
  have/VBP
  butted/VBN
  heads/NNS
  in/IN
  the/DT
  past/NN
  :/:
  (PERSON Musk/NN)
  's/POS
  2016/CD
  test/NN
  launch/NN
  of/IN
  a/DT
  (ORGANIZATION SpaceX/NNP)
  rocket/NN
  destroyed/VBD
  (PERSON Zuckerberg/NNP)
  's/POS
  (ORGANIZATION US/NNP)
  $/$
  200/CD
  million/CD
  satellite/NN
  ./.)


[nltk_data] Downloading package words to /Users/darkospy/nltk_data...
[nltk_data]   Package words is already up-to-date!
