### Stopwords 

In [12]:
"""
Stop words in Natural Language Processing (NLP) are common words that are often filtered out or ignored during text processing because they carry 
little meaningful information or value for analysis. These words are typically high-frequency words that are essential for sentence structure
but do not contribute significantly to the meaning or context of the text.
Examples of Stop Words
English: "a", "an", "the", "and", "or", "in", "of", "to", "is", "it", "for", "with", "on", "at", "by", "this", "that", "are", "was", "were", "as", 
"but", "not", etc.
"""

'\nStop words in Natural Language Processing (NLP) are common words that are often filtered out or ignored during text processing because they carry \nlittle meaningful information or value for analysis. These words are typically high-frequency words that are essential for sentence structure\nbut do not contribute significantly to the meaning or context of the text.\nExamples of Stop Words\nEnglish: "a", "an", "the", "and", "or", "in", "of", "to", "is", "it", "for", "with", "on", "at", "by", "this", "that", "are", "was", "were", "as", \n"but", "not", etc.\n'

In [1]:
corpus = """
"Dream, dream, dream. Dreams transform into thoughts, and thoughts result in action. 
You have the power to shape your destiny. Education is the most powerful weapon to change the world.
Embrace failure as a stepping stone to success, for it teaches resilience and determination.
Let integrity and hard work guide your path. Remember, greatness is born from small beginnings. 
Serve others selflessly, for true success lies in contributing to society. Believe in yourself, and never stop learning. 
Together, we can build a nation of innovation, compassion, and progress. Let us strive to ignite minds and illuminate lives. Thank you."
"""

#### Stemming with stop words

In [14]:
from nltk.stem import PorterStemmer

In [15]:
from nltk.corpus import stopwords

In [16]:
import nltk

In [17]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Anonymous\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [18]:
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

#### Best way is to create your own stopwords 

In [19]:
stemming = PorterStemmer()

In [20]:
from nltk.tokenize import sent_tokenize

In [21]:
document = sent_tokenize(corpus)

In [22]:
document

['\n"Dream, dream, dream.',
 'Dreams transform into thoughts, and thoughts result in action.',
 'You have the power to shape your destiny.',
 'Education is the most powerful weapon to change the world.',
 'Embrace failure as a stepping stone to success, for it teaches resilience and determination.',
 'Let integrity and hard work guide your path.',
 'Remember, greatness is born from small beginnings.',
 'Serve others selflessly, for true success lies in contributing to society.',
 'Believe in yourself, and never stop learning.',
 'Together, we can build a nation of innovation, compassion, and progress.',
 'Let us strive to ignite minds and illuminate lives.',
 'Thank you."']

In [23]:
from nltk.tokenize import word_tokenize

### Tasks :- Apply stopwords in each sentence in document , then apply stemming

In [34]:
for sentence in document:
    words=word_tokenize(sentence)
    print(words)

["''", 'Dream', ',', 'dream', ',', 'dream', '.']
['Dreams', 'transform', 'into', 'thoughts', ',', 'and', 'thoughts', 'result', 'in', 'action', '.']
['You', 'have', 'the', 'power', 'to', 'shape', 'your', 'destiny', '.']
['Education', 'is', 'the', 'most', 'powerful', 'weapon', 'to', 'change', 'the', 'world', '.']
['Embrace', 'failure', 'as', 'a', 'stepping', 'stone', 'to', 'success', ',', 'for', 'it', 'teaches', 'resilience', 'and', 'determination', '.']
['Let', 'integrity', 'and', 'hard', 'work', 'guide', 'your', 'path', '.']
['Remember', ',', 'greatness', 'is', 'born', 'from', 'small', 'beginnings', '.']
['Serve', 'others', 'selflessly', ',', 'for', 'true', 'success', 'lies', 'in', 'contributing', 'to', 'society', '.']
['Believe', 'in', 'yourself', ',', 'and', 'never', 'stop', 'learning', '.']
['Together', ',', 'we', 'can', 'build', 'a', 'nation', 'of', 'innovation', ',', 'compassion', ',', 'and', 'progress', '.']
['Let', 'us', 'strive', 'to', 'ignite', 'minds', 'and', 'illuminate', 'l

In [29]:
word_tokenize(document[0])

["''", 'Dream', ',', 'dream', ',', 'dream', '.']

In [27]:
len(document)

12

In [37]:
from nltk.stem import PorterStemmer

In [38]:
stemming = PorterStemmer()

In [40]:
processed_document=[]
for sentence in document:
    words = word_tokenize(sentence)
    words = [stemming.stem(word) for word in words if word not in set(stopwords.words('english'))]
    sentence = ' '.join(words)
    processed_document.append(sentence)

In [41]:
processed_document

["'' dream , dream , dream .",
 'dream transform thought , thought result action .',
 'you power shape destini .',
 'educ power weapon chang world .',
 'embrac failur step stone success , teach resili determin .',
 'let integr hard work guid path .',
 'rememb , great born small begin .',
 'serv other selflessli , true success lie contribut societi .',
 'believ , never stop learn .',
 'togeth , build nation innov , compass , progress .',
 'let us strive ignit mind illumin live .',
 "thank . ''"]

### doing lemmatizing

In [42]:
from nltk.stem import WordNetLemmatizer

In [43]:
word_lemm = WordNetLemmatizer()

In [44]:
processed_document=[]
for sentence in document:
    words = word_tokenize(sentence)
    words = [word_lemm.lemmatize(word) for word in words if word not in set(stopwords.words('english'))]
    sentence = ' '.join(words)
    processed_document.append(sentence)

In [45]:
processed_document

["'' Dream , dream , dream .",
 'Dreams transform thought , thought result action .',
 'You power shape destiny .',
 'Education powerful weapon change world .',
 'Embrace failure stepping stone success , teach resilience determination .',
 'Let integrity hard work guide path .',
 'Remember , greatness born small beginning .',
 'Serve others selflessly , true success lie contributing society .',
 'Believe , never stop learning .',
 'Together , build nation innovation , compassion , progress .',
 'Let u strive ignite mind illuminate life .',
 "Thank . ''"]