# Stemming & Lemmatization:

## Example: 1

In [4]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize

text = "Stemming reduces words to their base form, while lemmatization does so with the help of a dictionary."

# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in word_tokenize(text)]
print("Stemmed Words:", stemmed_words)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokenize(text)]
print("Lemmatized Words:", lemmatized_words)

Stemmed Words: ['stem', 'reduc', 'word', 'to', 'their', 'base', 'form', ',', 'while', 'lemmat', 'doe', 'so', 'with', 'the', 'help', 'of', 'a', 'dictionari', '.']
Lemmatized Words: ['Stemming', 'reduces', 'word', 'to', 'their', 'base', 'form', ',', 'while', 'lemmatization', 'doe', 'so', 'with', 'the', 'help', 'of', 'a', 'dictionary', '.']


## Example: 2

In [6]:
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize

text = "SnowballStemmer supports multiple languages for stemming."

# Stemming with SnowballStemmer
stemmer = SnowballStemmer(language='english')
stemmed_words = [stemmer.stem(word) for word in word_tokenize(text)]
print("Stemmed Words:", stemmed_words)

Stemmed Words: ['snowballstemm', 'support', 'multipl', 'languag', 'for', 'stem', '.']


## Example: 3

In [7]:
from nltk.stem import LancasterStemmer
from nltk.tokenize import word_tokenize

text = "LancasterStemmer is an aggressive stemming algorithm."

# Stemming with LancasterStemmer
stemmer = LancasterStemmer()
stemmed_words = [stemmer.stem(word) for word in word_tokenize(text)]
print("Stemmed Words:", stemmed_words)

Stemmed Words: ['lancasterstem', 'is', 'an', 'aggress', 'stem', 'algorithm', '.']


## Example: 4

In [8]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize

text = "Stemming and lemmatization may not always produce the same results."

# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in word_tokenize(text)]
print("Stemmed Words:", stemmed_words)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokenize(text)]
print("Lemmatized Words:", lemmatized_words)

Stemmed Words: ['stem', 'and', 'lemmat', 'may', 'not', 'alway', 'produc', 'the', 'same', 'result', '.']
Lemmatized Words: ['Stemming', 'and', 'lemmatization', 'may', 'not', 'always', 'produce', 'the', 'same', 'result', '.']


## Example: 5

In [9]:
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

text = "WordNetLemmatizer uses a lexical database for lemmatization."

# Lemmatization with WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokenize(text)]
print("Lemmatized Words:", lemmatized_words)

Lemmatized Words: ['WordNetLemmatizer', 'us', 'a', 'lexical', 'database', 'for', 'lemmatization', '.']
