In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('wordnet', quiet=True)
text = "John enjoys playing football while Mary loves reading books in the library."
tokens = word_tokenize(text)
print("Tokens:", tokens)
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words and word.isalpha()]
print("\nAfter removing stopwords:", filtered_tokens)
lemmatizer = WordNetLemmatizer()
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN
pos_tags = pos_tag(filtered_tokens)
lemmas = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in pos_tags]
print("\nLemmatized words:", lemmas)
verbs_nouns = [lemmatizer.lemmatize(word, get_wordnet_pos(pos))
for word, pos in pos_tags if pos.startswith('N') or pos.startswith('V')]

print("\nOnly Verbs and Nouns:", verbs_nouns)





Tokens: ['John', 'enjoys', 'playing', 'football', 'while', 'Mary', 'loves', 'reading', 'books', 'in', 'the', 'library', '.']

After removing stopwords: ['John', 'enjoys', 'playing', 'football', 'Mary', 'loves', 'reading', 'books', 'library']

Lemmatized words: ['John', 'enjoy', 'play', 'football', 'Mary', 'love', 'read', 'book', 'library']

Only Verbs and Nouns: ['John', 'enjoy', 'play', 'football', 'Mary', 'love', 'read', 'book']


In [4]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

nltk.download('punkt', quiet=True)
nltk.download('maxent_ne_chunker', quiet=True)
nltk.download('words', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

text = "Chris met Alex at Apple headquarters in California. He told him about the new iPhone launch."

tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
chunks = ne_chunk(pos_tags)

print("Named Entities:")
for chunk in chunks:
    if hasattr(chunk, 'label'): 
        entity = " ".join(c[0] for c in chunk)
        print(f"{entity} --> {chunk.label()}")
pronouns = {"he", "she", "they"}

if any(word.lower() in pronouns for word in tokens):
    print("\nWarning: Possible pronoun ambiguity detected!")







Named Entities:
Chris --> PERSON
Alex --> PERSON
Apple --> ORGANIZATION
California --> GPE

