In [1]:
import nltk
nltk.download('words')
nltk.download('brown')
nltk.download('punkt')

import re
from collections import Counter
import string
import spacy
from nltk.corpus import words as nltk_words, brown



[nltk_data] Downloading package words to
[nltk_data]     C:\Users\CHELSA\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\CHELSA\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\CHELSA\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")



In [3]:
# English word list and frequency from corpora
word_list = set(w.lower() for w in nltk_words.words())
word_freq = Counter(brown.words())
word_freq = {word.lower(): freq for word, freq in word_freq.items()}



In [5]:
# Word probability based on frequency
total_count = sum(word_freq.values())
word_prob = {word: count / total_count for word, count in word_freq.items()}



In [7]:
# Lemmatization (optional, not used in correction logic here)
def LemmWord(text):
    doc = nlp(text)
    return [token.lemma_ for token in doc]



In [9]:
# Generate edit-distance-1 words
def edits1(word):
    letters = string.ascii_lowercase
    splits = [(word[:i], word[i:]) for i in range(len(word)+1)]
    deletes = [L + R[1:] for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1]
    replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
    inserts = [L + c + R for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)



In [13]:
# Return known words from vocabulary
def known(words):
    return set(w for w in words if w in word_list)



In [31]:
# Autocorrect function using edit-distance 1
def autocorrect(word):
    word = word.lower()
    candidates = known([word]) or known(edits1(word)) or [word]
    return sorted(candidates, key=lambda w: -word_prob.get(w, 0))[:5]



In [33]:
# Run it
input_word = input("Enter any word: ")
print("Suggestions:", autocorrect(input_word))


Enter any word:  wagt


Suggestions: ['wage', 'wait', 'wart', 'walt', 'want']
