/
Correct.hs
42 lines (34 loc) · 1.59 KB
/
Correct.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
module Tim.Spell.Correct where
import Char
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Ord
import List
lowerWords = filter (not . null)
. map (map toLower . filter isAlpha)
. words
train w = Map.fromListWith (+)
$ zip w
$ repeat (1::Int)
readNWORDS = fmap (train . lowerWords)
$ readFile "big.txt"
alphabet = [ 'a' .. 'z' ]
edits1 word = let s = zip (inits word) (tails word)
deletes = [ a ++ y | (a, _:y ) <- s ]
transposes = [ a ++ y:x:z | (a, x:y:z) <- s ]
replaces = [ a ++ c:y | (a, _:y ) <- s, c <- alphabet ]
inserts = [ a ++ c:x | (a, x ) <- s, c <- alphabet ]
in Set.fromList $ concat [ deletes, transposes, replaces, inserts ]
known_edits2 knownWords = Set.unions
. Set.elems
. Set.map (Set.intersection knownWords . edits1)
. edits1
correct nwords word = let knownWords = Map.keysSet nwords
candidates = Set.elems
$ head
$ filter (not . Set.null)
$ [ Set.intersection knownWords $ Set.singleton word,
Set.intersection knownWords $ edits1 word,
known_edits2 knownWords word,
Set.singleton word ]
in maximumBy (comparing (`Map.lookup` nwords)) candidates