Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 91 lines (79 sloc) 3.127 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (** String Edit Distance / Levenshtein
19 @author Henri.Binsztok\@gmail.com
20 *)
21
22 let sed distance s1 s2 =
23 let l1 = String.length s1
24 and l2 = String.length s2 in
25 let mat = Array.make_matrix (succ l1) (succ l2) 0. in
26 mat.(0).(0) <- 0. ;
27 for i = 1 to l1 do
28 mat.(i).(0) <- mat.(pred i).(0) +. distance None (Some (String.unsafe_get s1 (pred i)))
29 done ;
30 for j = 1 to l2 do
31 mat.(0).(j) <- mat.(0).(pred j) +. distance (Some (String.unsafe_get s2 (pred j))) None
32 done ;
33 for i = 0 to pred l1 do
34 for j = 0 to pred l2 do
35 let x1 = mat.(i).(succ j) +. distance (Some (String.unsafe_get s1 i)) None
36 and x2 = mat.(succ i).(j) +. distance (Some (String.unsafe_get s2 j)) None
37 and x3 = mat.(i).(j) +. distance (Some (String.unsafe_get s1 i)) (Some (String.unsafe_get s2 j)) in
38 mat.(succ i).(succ j) <- min x1 (min x2 x3)
39 done
40 done ;
41 mat.(l1).(l2)
42
43 (* FIXME: vraie map / sauvé en binary... ! *)
44 (* FIXME: optimiser les valeurs ? *)
45 (* FIXME: prendre une layout clavier en entrée... *)
46 let qwerty_map =
47 let row y0 x0 w u1 u2 =
48 let m = String.length u1 in
49 let r = ref [] in
50 for i = 0 to m - 1 do
51 r := (y0, x0 +. (float_of_int i) *. w, 0.0, u1.[i])::!r;
52 r := (y0, x0 +. (float_of_int i) *. w, 1.0, u2.[i])::!r
53 done;
54 !r in
55 let qwerty_dummy = (20.0, 4.0, 0.5)
56 and qwerty_description = List.concat [
57 row 0.0 0.0 1.7461 "`1234567890-=" "~!@#$%^&*()_+" ;
58 [1.5, 1.0, 0.0, '\t'] ;
59 row 1.5 2.8 1.7461 "qwertyuiop[]" "QWERTYUIOP{}" ;
60 row 3.0 3.4 1.7461 "asdfghjkl;'\\" "ASDFGHJKL:\"|";
61 row 4.5 2.8 1.7461 "<zxcvbnm,./" ">ZXCVBNM<>?" ;
62 [6.0, 12.5, 0.0, ' ']
63 ] in
64 let a = Array.make 256 qwerty_dummy in
65 List.iter (fun (x,y,z,c) -> a.(Char.code c) <- (x,y,z)) qwerty_description ;
66 a
67
68 let euclidian_distance (x1, y1, z1) (x2, y2, z2) =
69 let f a1 a2 = (a1 -. a2) *. (a1 -. a2) in
70 sqrt ((f x1 x2) +. (f y1 y2) +. (f z1 z2))
71
72 let qwerty_distance =
73 let insertion_cost = 7.0
74 and deletion_cost = 5.0 in
75 fun c1 c2 ->
76 let p c = qwerty_map.(Char.code c) in
77 match c1, c2 with
78 | None, None -> 0.
79 | Some _, None -> deletion_cost
80 | None, Some _ -> insertion_cost
81 | Some c1, Some c2 -> euclidian_distance (p c1) (p c2)
82
83 let simple_distance c1 c2 = match c1,c2 with
84 | None, None -> 0.
85 | Some c1, Some c2 -> if c1 = c2 then 0. else 1.
86 | _ -> 1. (* insertion, deletion *)
87
88 let qwerty_sed = sed qwerty_distance
89
90 let simple_sed = sed simple_distance
Something went wrong with that request. Please try again.