<a href="https://colab.research.google.com/github/Pavanipriyal13/autocorrect/blob/main/autocorrect.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
import string
from collections import Counter


In [None]:
class SpellChecker(object):
    def __init__(self, corpus_path):
        with open(corpus_path, 'r') as file:
            lines = file.readlines()
            words = []
            for line in lines:
                words += re.findall(r'\w+', line.lower())

            self.vocabs = set(words)
            self.words_count = Counter(words)
            total_words = float(sum(self.words_count.values()))
            self.wordProbs = {word: self.words_count[word] / total_words for word in self.vocabs}

    def level_one_edits(self, word):
        letters = string.ascii_lowercase
        split = [(word[:i], word[i:]) for i in range(len(word) + 1)]
        delete = [l + r[1:] for l, r in split if r]
        swap = [l + r[1] + r[0] + r[2:] for l, r in split if len(r) > 1]
        insert = [l + c + r for l, r in split for c in letters]
        replace = [l + c + r[1:] for l, r in split if r for c in letters]

        return set(delete + swap + replace + insert)

    def level_two_edits(self, word):
        return set(e2 for e1 in self.level_one_edits(word) for e2 in self.level_one_edits(e1))

    def check(self, word):
        if word in self.vocabs:
            return [(word, self.wordProbs[word])]

        # Generate level one candidates
        level_one_candidates = self.level_one_edits(word)
        print(f"Level one candidates for '{word}': {level_one_candidates}")

        valid_candidates = [w for w in level_one_candidates if w in self.vocabs]
        if valid_candidates:
            print(f"Valid level one candidates: {valid_candidates}")
            return sorted([(c, self.wordProbs[c]) for c in valid_candidates], key=lambda tup: tup[1], reverse=True)

        # If no valid level one candidates, generate level two candidates
        level_two_candidates = self.level_two_edits(word)
        print(f"Level two candidates for '{word}': {level_two_candidates}")

        valid_candidates = [w for w in level_two_candidates if w in self.vocabs]
        if valid_candidates:
            print(f"Valid level two candidates: {valid_candidates}")
            return sorted([(c, self.wordProbs[c]) for c in valid_candidates], key=lambda tup: tup[1], reverse=True)

        # Return the original word with probability 0 if no valid candidates found
        return [(word, 0.0)]


In [None]:
checker = SpellChecker('/content/urbandict-word-defs.csv')

print("Enter your word:", end=" ")
word = input().strip()
suggestions = checker.check(word)

# Print the suggestions with the highest probability first
print("Suggestions:")
for suggestion, probability in suggestions:
    print(f"Word: {suggestion}, Probability: {probability}")


Enter your word: preadict
Level one candidates for 'preadict': {'preadicit', 'preadictm', 'dreadict', 'prsadict', 'preadicut', 'preadiict', 'preajict', 'prerdict', 'preaddct', 'lpreadict', 'prdeadict', 'pcreadict', 'preawict', 'preavict', 'praedict', 'preadhct', 'preadiect', 'preadicrt', 'pceadict', 'pregdict', 'preadlict', 'prwadict', 'preadxict', 'preadkct', 'preadwict', 'preadictf', 'preadhict', 'preadic', 'premadict', 'ireadict', 'ipreadict', 'prjeadict', 'preaodict', 'rpreadict', 'preadicm', 'preadsict', 'jpreadict', 'preaidict', 'preabict', 'ppreadict', 'preadkict', 'prehadict', 'preadiqct', 'preadikct', 'preadiyct', 'preadit', 'preamdict', 'preadaict', 'preadqict', 'preadiwt', 'oreadict', 'pdeadict', 'prendict', 'preadoct', 'pqeadict', 'preadiet', 'preadicty', 'preladict', 'preadivt', 'preaydict', 'preadinct', 'preadect', 'preadxct', 'preadicst', 'preadice', 'preaict', 'dpreadict', 'plreadict', 'creadict', 'prmeadict', 'prweadict', 'preudict', 'preapdict', 'preadipt', 'pyreadict