In [118]:
import re
from collections import Counter
import numpy as np
import pandas as pd
import nltk


In [119]:
with open('sample.txt','r',encoding="utf8") as f:
    file = f.read().lower()
    w = re.findall('\w+',file)
vocab = set(w)

In [120]:
def find_wrong_word(sentence, vocab):
    wrong_words = []
    sentence = sentence.strip().lower().split(" ")
    for word in sentence:    
        if word not in vocab:
            wrong_words.append(word)
    return wrong_words

In [121]:
def switch_letter(word, verbose=False):
    switch_l = []
    split_l = [(word[:i],word[i:]) for i in range(len(word))]
    for s in split_l:
        if len(s[1]) > 2:
            temp = s[0] + s[1][1] + s[1][0] + s[1][2:]
        elif len(s[1]) == 2:
            temp = s[0] + s[1][1] + s[1][0]
        elif len(s[1]) == 1:
            continue
        switch_l.append(temp)
    return switch_l

In [122]:
def replace_letter(word):
    letters = 'abcdefghijklmnopqrstuvwxyz'
    replace_l = []
    split_l = [(word[:i], word[i:]) for i in range(len(word))]
    for s in split_l:
        if len(s[1]) == 1:
            for l in letters:
                if l != s[1][0]:
                    temp = l
                    replace_l.append(s[0]+temp)
        elif len(s) > 1:
            for l in letters:
                if l != s[1][0]:
                    temp = l+s[1][1:]
                    replace_l.append(s[0]+temp)
    replace_set = set(replace_l)
    replace_l = sorted(list(replace_set))
    return replace_l

In [123]:
def insert_letter(word):
    letters = 'abcdefghijklmnopqrstuvwxyz'
    insert_l = []
    split_l = [(word[:i], word[i:]) for i in range(len(word)+1)]
    for s in split_l:
        for l in letters:
            insert_l.append(s[0]+l+s[1])
    return insert_l

In [124]:
def delete_letter(word):
    split_l = [(word[:i],word[i:]) for i in range(len(word))]
    delete_l = [s[0]+s[1][1:] for s in split_l]
    return delete_l

In [125]:
def edit_one_letter(word, allow_switches = True):
    edit_one_set = set()
    insert_l = insert_letter(word)
    delete_l = delete_letter(word)
    replace_l = replace_letter(word)
    switch_l = switch_letter(word)

    if allow_switches:
        ans = insert_l + delete_l + replace_l + switch_l
    else:
        ans = insert_l + delete_l + replace_l
    edit_one_set = set(ans)
    return edit_one_set

In [126]:
def edit_two_letters(word, allow_switches = True):
    edit_two_set = set()
    one_edit = edit_one_letter(word)
    ans = []
    for w in one_edit:
        ans.append(w)
        ans.extend(edit_one_letter(w))
    edit_two_set = set(ans)
    return edit_two_set 

In [127]:
def get_count(w):
    word_count_dict = {}
    word_count_dict = Counter(w)
    return word_count_dict

def get_probs(word_count_dict):
    probs = {}
    total = 1
    for word in word_count_dict.keys():
        total = total + word_count_dict[word]
    for word in word_count_dict.keys():
        probs[word] = word_count_dict[word]/total
    return probs

In [128]:
def get_corrections(word, probs, n=2):
    suggestions = []
    n_best = []
    if word in probs.keys():
        suggestions.append(word)
    for w in edit_one_letter(word):
        if len(suggestions) == n:
            break
        if w in probs.keys():
            suggestions.append(w)
    for w in edit_two_letters(word):
        if len(suggestions) == n:
            break
        if w in probs.keys():
            suggestions.append(w)
    best_words = {}
    for s in suggestions:
        best_words[s] = probs[s]
    best_words = sorted(best_words.items(), key=lambda x: x[1], reverse=True)
    n_best = best_words
    return n_best

In [129]:
def get_correct_word(word, probs, n):
    corrections = get_corrections(word, probs, n)
    if len(corrections) == 0:
        return word
    final_word = corrections[0][0]
    final_prob = corrections[0][1]
    for i, word_prob in enumerate(corrections):
        if word_prob[1] > final_prob:
            final_word = word_prob[0]
            final_prob = word_prob[1]
    return final_word

In [130]:
def autocorrect(sentence, vocab, probs):
    wrong_words = find_wrong_word(sentence, vocab)
    correct_words = []
    for word in sentence.strip().lower().split(" "):
        if word in wrong_words:
            correct_word = get_correct_word(word, probs, 25)
            word = correct_word
        correct_words.append(word)
    print("The corrected sentence : "," ".join(correct_words).capitalize())

sentence = input("Enter a sentence: ")
print("Input Sentence: "+sentence)
wrong_words = find_wrong_word(sentence, vocab)
for W in wrong_words:
    word_count_dict = get_count(w)
    probs = get_probs(word_count_dict)
autocorrect(sentence, vocab, probs)

Input Sentence: I liki progremmeng alot
The corrected sentence :  I like programming alot
