In [21]:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk import pos_tag # Corrected import statement

# Download necessary NLTK data if not already present
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

medical_text = """Diabetes is a chronic disease that affects how the body processes blood sugar.\nIf untreated, diabetes may cause heart disease, kidney failure, nerve damage and vision problems.\nEarly diagnosis and proper treatment help improve patient outcomes."""

# Tokenize the text
tokens = word_tokenize(medical_text)
print("Original Tokens:")
print(tokens)
print("\n" + "-"*30 + "\n")

# --- Stemming ---
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in tokens]

print("Stemmed Words (Porter Stemmer):")
print(stemmed_words)
print("\n" + "-"*30 + "\n")

# --- Lemmatization ---
lemmatizer = WordNetLemmatizer()

# Helper function to convert NLTK's POS tags to WordNet's format
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN # Default to noun if POS not found

# Get POS tags for lemmatization
pos_tags_nltk = pos_tag(tokens)

lemmatized_words = []
for word, tag in pos_tags_nltk:
    wntag = get_wordnet_pos(tag)
    lemmatized_words.append(lemmatizer.lemmatize(word, wntag))

print("Lemmatized Words (WordNet Lemmatizer):")
print(lemmatized_words)
print("\n" + "-"*30 + "\n")

Original Tokens:
['Diabetes', 'is', 'a', 'chronic', 'disease', 'that', 'affects', 'how', 'the', 'body', 'processes', 'blood', 'sugar', '.', 'If', 'untreated', ',', 'diabetes', 'may', 'cause', 'heart', 'disease', ',', 'kidney', 'failure', ',', 'nerve', 'damage', 'and', 'vision', 'problems', '.', 'Early', 'diagnosis', 'and', 'proper', 'treatment', 'help', 'improve', 'patient', 'outcomes', '.']

------------------------------

Stemmed Words (Porter Stemmer):
['diabet', 'is', 'a', 'chronic', 'diseas', 'that', 'affect', 'how', 'the', 'bodi', 'process', 'blood', 'sugar', '.', 'if', 'untreat', ',', 'diabet', 'may', 'caus', 'heart', 'diseas', ',', 'kidney', 'failur', ',', 'nerv', 'damag', 'and', 'vision', 'problem', '.', 'earli', 'diagnosi', 'and', 'proper', 'treatment', 'help', 'improv', 'patient', 'outcom', '.']

------------------------------

Lemmatized Words (WordNet Lemmatizer):
['Diabetes', 'be', 'a', 'chronic', 'disease', 'that', 'affect', 'how', 'the', 'body', 'process', 'blood', 'sug