In [63]:
import PyPDF2  # Commented out for future potential (PDF conversion)
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet  # Import for synonym finding
import difflib  # Import for efficient string similarity comparison

In [64]:
def extract_text_from_txt(txt_path):
    """Extracts text from a TXT file."""
    try:
        with open(txt_path, 'r') as txt_file:
            text = txt_file.read().strip()
            return text
    except FileNotFoundError:
        print("Error: TXT file not found.")
        return None

In [65]:
def tokenize_text(text):
    """Tokenizes the extracted text."""
    tokens = word_tokenize(text.lower())
    return tokens

In [66]:
def find_similar_words(word, similarity_threshold=0.7):
    """Finds synonyms for a given word using WordNet."""
    synonyms = []
    for synset in wordnet.synsets(word):
        for lemma in synset.lemmas():
            synonyms.append(lemma.name())
    return [synonym for synonym in synonyms if difflib.SequenceMatcher(None, word, synonym).ratio() >= similarity_threshold]


In [67]:
def update_text_file(txt_path, old_text, new_text):
    """Updates the text in the TXT file."""
    try:
        with open(txt_path, 'r') as txt_file:
            text = txt_file.read()
        replaced_text = text.replace(old_text, new_text)

        with open(txt_path, 'w') as txt_file:
            txt_file.write(replaced_text)
        print("Text file updated successfully.")
    except FileNotFoundError:
        print("Error: TXT file not found.")


In [68]:
def main():
    txt_path = input("Enter the TXT file path: ")
    text = extract_text_from_txt(txt_path)

    if text:
        tokens = tokenize_text(text)

        while True:
            keyword = input("Enter a keyword to find similar words (or 'q' to quit): ")
            if keyword.lower() == 'q':
                break

            similar_words = find_similar_words(keyword)

            if similar_words:
                print("Found similar words:", ", ".join(similar_words))

                replacement = input("Enter the replacement word (or 'n' to skip): ")
                if replacement.lower() != 'n':
                    old_text = keyword
                    new_text = replacement
                    print(f"Updating '{old_text}' to '{new_text}' in the text file.")
                    update_text_file(txt_path, old_text, new_text)

if __name__ == "__main__":
    main()

Enter the TXT file path: /content/Employee-Handbook.txt
Enter a keyword to find similar words (or 'q' to quit): harassment
Found similar words: harassment, harassment
Enter the replacement word (or 'n' to skip): HARASSMENT
Updating 'harassment' to 'HARASSMENT' in the text file.
Text file updated successfully.
Enter a keyword to find similar words (or 'q' to quit): q
