In [4]:
import re
from difflib import get_close_matches

def load_correct_forms():
    correct_forms = set()

    ilakkanam_forms = [
        "அ", "ஆ", "இ", "ஈ", "உ", "ஊ", "எ", "ஏ", "ஐ", "ஒ", "ஓ", "ஔ",
        "க்", "ச்", "ட்", "த்", "ப்", "ற்",
        "ங்", "ஞ்", "ண்", "ந்", "ம்", "ன்",
        "ய்", "ர்", "ல்", "வ்", "ழ்", "ள்"
    ]
    correct_forms.update(ilakkanam_forms)

    letters_forms = [
        "அ", "ஆ", "இ", "ஈ", "உ", "ஊ", "எ", "ஏ", "ஐ", "ஒ", "ஓ", "ஔ",
        "க", "கா", "கி", "கீ", "கு", "கூ", "செ", "சே", "தை", "பூ"
    ]
    correct_forms.update(letters_forms)

    try:
        with open("words.txt", "r", encoding="utf-8") as file:
            words = file.read().splitlines()
            correct_forms.update(words)
    except FileNotFoundError:
        print("words.txt not found. Make sure the file exists in the directory.")

    return correct_forms

def tokenize_paragraph(paragraph):
    tokens = re.split(r'\s+|[.,;!?"()]', paragraph)
    return [token for token in tokens if token]

def suggest_correction(word, correct_forms):
    suggestions = get_close_matches(word, correct_forms, n=1, cutoff=0.7)
    return suggestions[0] if suggestions else None

def correct_paragraph(paragraph, correct_forms):
    tokens = tokenize_paragraph(paragraph)
    corrected_tokens = []

    for token in tokens:
        if token in correct_forms:
            corrected_tokens.append(token)  
        else:
            correction = suggest_correction(token, correct_forms)
            corrected_tokens.append(correction if correction else token) 

    return " ".join(corrected_tokens)

def main():
    correct_forms = load_correct_forms()
    input_paragraph = "உங்கள் நலமாக இருக்கிறேன்"

    print("Original Paragraph:")
    print(input_paragraph)

    corrected_paragraph = correct_paragraph(input_paragraph, correct_forms)

    print("\nCorrected Paragraph:")
    print(corrected_paragraph)

if __name__ == "__main__":
    main()


Original Paragraph:
உங்கள் நலமாக இருக்கிறேன்

Corrected Paragraph:
உங்கள் நலமாக இருக்கிறேன்


Original Paragraph:
உங்கள் நலமாக இருக்கிறேன்

Corrected Paragraph:
உங்கள் நலமாக இருக்கிறேன்
