In [None]:
# Install NLTK if needed
!pip install nltk





In [None]:
import nltk
nltk.download('words')
from nltk.corpus import words
import re
from collections import Counter


[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


In [None]:
# Use the NLTK words corpus as our vocabulary
word_list = words.words()
word_freq = Counter(word_list)  # Count frequencies, though here it's a simple corpus with each word appearing once

# Define a set of all known words
WORD_SET = set(word_list)


In [None]:
# Define a function to calculate minimum edit distance
def edit_distance(word1, word2):
    dp = [[0] * (len(word2) + 1) for _ in range(len(word1) + 1)]
    for i in range(len(word1) + 1):
        for j in range(len(word2) + 1):
            if i == 0:
                dp[i][j] = j  # Cost of insertions
            elif j == 0:
                dp[i][j] = i  # Cost of deletions
            elif word1[i - 1] == word2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]  # No change cost
            else:
                dp[i][j] = 1 + min(dp[i - 1][j],      # Deletion
                                   dp[i][j - 1],      # Insertion
                                   dp[i - 1][j - 1])  # Substitution
    return dp[-1][-1]

# Define a function to calculate word probability
def word_probability(word, N=sum(word_freq.values())):
    return word_freq[word] / N if word in word_freq else 0


In [None]:
# Suggest corrections based on edit distance and probability
def autocorrect(word):
    # If the word is correct, return it as is
    if word in WORD_SET:
        return word

    # Find candidate words within an edit distance of 1 or 2
    candidates = [w for w in WORD_SET if edit_distance(word, w) <= 2]

    # Choose the candidate with the highest probability
    corrected_word = max(candidates, key=word_probability, default=word)

    return corrected_word


In [None]:
# Test the function with common misspellings
test_words = ["speling", "korrect", "exampl", "wrld"]

for word in test_words:
    print(f"Original: {word} -> Suggested: {autocorrect(word)}")


Original: speling -> Suggested: feeling
Original: korrect -> Suggested: horrent
Original: exampl -> Suggested: example
Original: wrld -> Suggested: word
