In [1]:
import re
from collections import Counter
import itertools

In [2]:
def words(text): return re.findall(r'\w+', text.lower())

WORDS = Counter(words(open('corpus.txt').read()))

def P(word, N=sum(WORDS.values())): 
    return WORDS[word] / N

def correction(word): 
    return max(candidates(word), key=P)

def candidates(word): 
    return (known([word]) or known(edits1(word)) or [word])

def known(words): 
    return set(w for w in words if w in WORDS)

def edits1(word):
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

#For enhanced version of the algorithm
def edits2(word): 
    return (e2 for e1 in edits1(word) for e2 in edits1(e1))

def candidates2(word): 
    return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])

def correction2(word): 
    return max(candidates2(word), key=P)

In [3]:
misspelled_words = words(open('test-words-misspelled.txt').read())

In [4]:
corrected_words = []
corrected_words_enhanced = []
misspelled_corrected = {}
misspelled_corrected_enhanced = {}
count = 0
for i in misspelled_words:
    corrected_words.append(correction(i))
    corrected_words_enhanced.append(correction2(i))
    misspelled_corrected[misspelled_words[count]] = corrected_words[count]
    misspelled_corrected_enhanced[misspelled_words[count]] = corrected_words_enhanced[count]
    count += 1

In [5]:
print('First Version first 5 words', dict(itertools.islice(misspelled_corrected.items(), 5)))
print('\n')
print('Enhanced version first 5 words', dict(itertools.islice(misspelled_corrected_enhanced.items(), 5)))

First Version first 5 words {'abilty': 'ability', 'abraod': 'abroad', 'acedemic': 'academic', 'accesion': 'accession', 'accomodate': 'accommodate'}


Enhanced version first 5 words {'abilty': 'ability', 'abraod': 'abroad', 'acedemic': 'academic', 'accesion': 'accession', 'accomodate': 'accommodate'}


In [6]:
file1 = open('corrected_words_file.txt', 'w+')
file2 = open('corrected_words_enhanced_file.txt', 'w+')

In [7]:
string = ""
for i in corrected_words:
    if i == corrected_words[-1]:
        string += i
    else:
        string += i + '\n'
file1 = file1.write(string)

string2 = ""
for i in corrected_words_enhanced:
    if i == corrected_words_enhanced[-1]:
        string += i
    else:
        string += i + '\n'
file2 = file2.write(string)

In [8]:
correct_words = words(open('test-words-correct.txt').read())

In [9]:
corrects = 0
wrongs = 0
corrects2 = 0
wrongs2 = 0
for i in range(len(corrected_words)):
    if corrected_words[i] == correct_words[i]:
        corrects += 1
    else:
        wrongs += 1 
    if corrected_words_enhanced[i] == correct_words[i]:
        corrects2 += 1
    else:
        wrongs2 += 1 

In [10]:
print('-----------------First Version Stats-----------------')
print('Number of correct words:', corrects)
print('Number of wrong words:', wrongs)
print('Accuracy Rate:', ((corrects / len(corrected_words)) * 100), '%')
print('\n')
print('------------Second Enhanced Version Stats------------')
print('Number of correct words:', corrects2)
print('Number of wrong words:', wrongs2)
print('Accuracy Rate:', ((corrects2 / len(corrected_words_enhanced)) * 100), '%')

-----------------First Version Stats-----------------
Number of correct words: 281
Number of wrong words: 103
Accuracy Rate: 73.17708333333334 %


------------Second Enhanced Version Stats------------
Number of correct words: 295
Number of wrong words: 89
Accuracy Rate: 76.82291666666666 %


In [11]:
while True:
    val = input('Enter a word to correct. Enter 0 for exit.')
    if val == '0':
        break
    version = input('Chose the method 1 for first, 2 for enhanced.')
    if version == '1':        
        corrected_val = correction(val)
        print('Correction:', corrected_val)
    elif version == '2':
        corrected_val = correction2(val)
        print('Correction:', corrected_val)

Enter a word to correct. Enter 0 for exit. sper
Chose the method 1 for first, 2 for enhanced. 2


Correction: per


Enter a word to correct. Enter 0 for exit. 0


In [12]:
val = input('Press any key to exit.')

Press any key to exit. 
