Implement the concepts of Stemming & Lemmatization


In [1]:
#importing the dependencies for stemming and lemmatization
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize  #word_tokenize, sent_tokenize from nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer  #applying PorterStemmer, WordNetLemmatizer

nltk.download('punkt')     #downloading necessories resources fom nltk
nltk.download('wordnet')

#declaring functions for stemming and lemmatization
def process_text(text):
    #tokenize the text into words
    words = word_tokenize(text)

    # Initialize stemming and lemmatization objects
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()

    # Perform stemming and lemmatization
    stemmed_words = [stemmer.stem(word) for word in words]
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    #returing
    return stemmed_words, lemmatized_words

#declaring the function to read the txt file by provided by me/user
def read_text(filename):
    with open(filename, 'r') as file:
        text = file.read()
    return text  #return text

def main():
    #read text from file's name text.txt
    filename = 'text.txt'  # Change to the name of your .txt file
    text = read_text(filename)

    #perform stemming and lemmatization
    stemmed_words, lemmatized_words = process_text(text)

    #results
    print("Original text:\n", text)
    print("\nStemmed words:\n", stemmed_words)
    print("\nLemmatized words:\n", lemmatized_words)

if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Original text:
 Reliable pose estimation of uncooperative satellites
is a key technology for enabling future on-orbit servicing and
debris removal missions. The Kelvins Satellite Pose Estimation
Challenge aims at evaluating and comparing monocular visionbased approaches and pushing the state-of-the-art on this problem.

Stemmed words:
 ['reliabl', 'pose', 'estim', 'of', 'uncoop', 'satellit', 'is', 'a', 'key', 'technolog', 'for', 'enabl', 'futur', 'on-orbit', 'servic', 'and', 'debri', 'remov', 'mission', '.', 'the', 'kelvin', 'satellit', 'pose', 'estim', 'challeng', 'aim', 'at', 'evalu', 'and', 'compar', 'monocular', 'vision\x02bas', 'approach', 'and', 'push', 'the', 'state-of-the-art', 'on', 'thi', 'prob\x02lem', '.']

Lemmatized words:
 ['Reliable', 'pose', 'estimation', 'of', 'uncooperative', 'satellite', 'is', 'a', 'key', 'technology', 'for', 'enabling', 'future', 'on-orbit', 'servicing', 'and', 'debris', 'removal', 'mission', '.', 'The', 'Kelvins', 'Satellite', 'Pose', 'Estimatio