<a href="https://colab.research.google.com/github/Sajishvar/Spell_Grammer_Checker_Tamil/blob/sajishvar/Spell_Grammer_Checker_Tamil.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
from collections import Counter

# Adjust encoding configuration for standard output in environments supporting it.
if hasattr(sys.stdout, 'reconfigure'):
    sys.stdout.reconfigure(encoding='utf-8')

In [None]:
def load_dictionary(file_path):
    # Load words from a dictionary file
    with open(file_path, 'r', encoding='utf-8') as file:
        dictionary = set(line.strip() for line in file)
    return dictionary

In [None]:
def word_length_similarity(misspelt_word, candidate_word):
    # Binary score: 1.0 if lengths match, 0.0 if they don't
    return 1.0 if len(misspelt_word) == len(candidate_word) else 0.0

In [None]:
def character_frequency_similarity(misspelt_word, candidate_word):
    # Count frequency of each character in both words
    misspelt_counter = Counter(misspelt_word)
    candidate_counter = Counter(candidate_word)
    # Compute sum of minimum matches for each character
    matching_count = sum(min(misspelt_counter[char], candidate_counter[char]) for char in misspelt_counter)
    # Normalize by the length of the misspelt word
    return matching_count / len(misspelt_word)

In [None]:
def position_similarity(misspelt_word, candidate_word):
    # Count matching characters at the same positions
    match_count = sum(1 for m_char, c_char in zip(misspelt_word, candidate_word) if m_char == c_char)

    # Normalize by the length of the shorter word to avoid penalizing due to different lengths
    return match_count / min(len(misspelt_word), len(candidate_word))

In [None]:
def first_letter_similarity(misspelt_word, candidate_word):
    # Check if the first letters of both words match
    return 1.0 if misspelt_word[0] == candidate_word[0] else 0.0

In [None]:
def levenshtein_distance(word1, word2):
    # Compute the Levenshtein distance between two words
    len_word1, len_word2 = len(word1), len(word2)
    matrix = [[0] * (len_word2 + 1) for _ in range(len_word1 + 1)]

    for i in range(len_word1 + 1):
        matrix[i][0] = i
    for j in range(len_word2 + 1):
        matrix[0][j] = j

    for i in range(1, len_word1 + 1):
        for j in range(1, len_word2 + 1):
            cost = 0 if word1[i-1] == word2[j-1] else 1
            matrix[i][j] = min(
                matrix[i-1][j] + 1,  # Deletion
                matrix[i][j-1] + 1,  # Insertion
                matrix[i-1][j-1] + cost  # Substitution
            )

    return matrix[len_word1][len_word2]