In [1]:
import re
from collections import Counter

In [2]:
def words(document):
    return re.findall(r'\w+' , document.lower())

In [3]:
all_words = Counter(words(open ("big.txt").read()))

In [4]:
all_words['chair']

135

In [5]:
all_words.most_common(10)

[('the', 79809),
 ('of', 40024),
 ('and', 38312),
 ('to', 28765),
 ('in', 22023),
 ('a', 21124),
 ('that', 12512),
 ('he', 12401),
 ('was', 11410),
 ('it', 10681)]

In [6]:
def edits_one(word):
    alphabets    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])                   for i in range(len(word) + 1)]
    deletes    = [left + right[1:]                       for left, right in splits if right]
    inserts    = [left + c + right                       for left, right in splits for c in alphabets]
    replaces   = [left + c + right[1:]                   for left, right in splits if right for c in alphabets]
    transposes = [left + right[1] + right[0] + right[2:] for left, right in splits if len(right)>1]
    return set(deletes + inserts + replaces + transposes)

In [7]:
word = "money"
alphabets    = 'abcdefghijklmnopqrstuvwxyz'
splits     = [(word[:i], word[i:])                   for i in range(len(word) + 1)]
deletes    = [left + right[1:]                       for left, right in splits if right]
inserts    = [left + c + right                       for left, right in splits for c in alphabets]
replaces   = [left + c + right[1:]                   for left, right in splits if right for c in alphabets]
transposes = [left + right[1] + right[0] + right[2:] for left, right in splits if len(right)>1]

In [8]:
transposes

['omney', 'mnoey', 'moeny', 'monye']

In [9]:
def edits_two(word):
    return (e2 for e1 in edits_one(word) for e2 in edits_one(e1))

In [10]:
def known(words):
    return set(word for word in words if word in all_words)

In [11]:
def possible_corrections(word):
    return (known([word]) or known(edits_one(word)) or known(edits_two(word)) or [word])

In [12]:
def prob(word, N=sum(all_words.values())): 
    return all_words[word] / N

In [13]:
print(len(set(possible_corrections("emfasize"))))

print(edits_one("monney"))

1
{'mkonney', 'molney', 'monned', 'monwey', 'mojney', 'monnev', 'monneyt', 'mojnney', 'mozney', 'monxney', 'jonney', 'monnek', 'mqnney', 'xonney', 'monneyz', 'pmonney', 'monneb', 'monneu', 'moinney', 'monkney', 'mowney', 'moqney', 'monneys', 'monndey', 'monnoy', 'monxey', 'monner', 'bmonney', 'monnefy', 'moxney', 'zmonney', 'monnery', 'monuey', 'monny', 'mxonney', 'monnesy', 'mqonney', 'maonney', 'monneyq', 'monpney', 'monneyh', 'mnoney', 'monnhy', 'monnwey', 'moknney', 'gmonney', 'gonney', 'mmnney', 'monpey', 'monnny', 'monnby', 'monneyv', 'monqney', 'monyney', 'mgnney', 'monoey', 'momnney', 'ronney', 'moynney', 'monnmey', 'monvney', 'monnvy', 'molnney', 'monnevy', 'mwnney', 'omonney', 'monfney', 'lonney', 'moniney', 'monaney', 'conney', 'mfnney', 'mnnney', 'movney', 'hmonney', 'monkey', 'monnzy', 'mronney', 'monnkey', 'mogney', 'vonney', 'moznney', 'msonney', 'mdnney', 'monneq', 'mdonney', 'mooney', 'moiney', 'modnney', 'monnfey', 'monnzey', 'wonney', 'movnney', 'monhney', 'monnqy', 

In [14]:
print(known(edits_one("monney")))

{'money', 'monkey'}


In [15]:
print(len(set(edits_two("monney"))))
print(known(edits_one("monney")))

51013
{'money', 'monkey'}


In [16]:
print(possible_corrections("monney"))

{'money', 'monkey'}


In [17]:
print(prob("money"))
print(prob("monkey"))

0.0002922233626303688
5.378344097491451e-06


In [18]:
def spell_check(word):
    "Print the most probable spelling correction for `word` out of all the `possible_corrections`"
    correct_word = max(possible_corrections(word), key=prob)
    if correct_word != word:
        return "Did you mean " + correct_word + "?"
    else:
        return "Correct spelling."

In [19]:
print(spell_check("monney"))

Did you mean money?
