In [None]:
import os
import sys
import warnings

warnings.simplefilter(action='ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

try:
    import spacy
    nlp = spacy.load("en_core_web_sm")
except:
    print("SpaCy model 'en_core_web_sm' not found. Install it using:")
    print("python -m spacy download en_core_web_sm")
    sys.exit(1)

from nltk.stem import SnowballStemmer

stemmer = SnowballStemmer(language='english')

filename = input("Enter text file name for full text processing: ")
filepath = os.path.join(sys.path[0], filename)

if not os.path.exists(filepath):
    print(f"Error: File '{filename}' not found.")
    sys.exit(1)

with open(filepath, 'r', encoding='utf-8') as f:
    content = f.read()

print("\nOriginal Text Sample:")
print(content[:300])
print()

print("=== Lemmatization: Individual Words ===")
sample_words = "friendship studied was am is organizing matches"
doc = nlp(sample_words)

for token in doc:
    if not token.is_space:
        print(f"{token.text} -> {token.lemma_}")
print()

print("=== Stemming: Individual Words ===")
for word in sample_words.split():
    print(f"{word} --> {stemmer.stem(word)}")
print()

print("=== Lemmatization: Full Text ===")
doc_full = nlp(content)
tokens = [token for token in doc_full if not token.is_space][:50]

for token in tokens:
    print(f"{token.text} --> {token.lemma_}")
print()

print("=== Stemming: Full Text ===")
for token in tokens:
    print(f"{token.text} --> {stemmer.stem(token.text.lower())}")
print()

print("=== Practice 6.2: Lemmatization vs Stemming ===")
print("Word\t\tLemma\t\tStem")
print("------------------------------------------")

practice_words = "running good universities flies fairer is"
doc_practice = nlp(practice_words)

for token in doc_practice:
    word = token.text
    lemma = token.lemma_
    stem = stemmer.stem(word.lower())
    print(f"{word}\t\t{lemma}\t\t{stem}")
print()

# ------------------ 7. CONCLUSION ------------------
print("Conclusion:")
print("Lemmatization produces dictionary-based meaningful root words, while stemming may distort words by chopping suffixes. For NLP tasks like search, topic modeling, and information retrieval, lemmatization gives better and cleaner output.")
