From fcdd51a2805b392dafc99d75ac8072a9345f0a6f Mon Sep 17 00:00:00 2001 From: Simon Krajewski Date: Tue, 5 Feb 2019 09:58:12 +0100 Subject: [PATCH] [eval] slightly optimize `find_substring` for ascii strings --- src/macro/eval/evalStdLib.ml | 2 +- src/macro/eval/evalString.ml | 30 ++++++++++++++++++++------- tests/unit/src/unitstd/String.unit.hx | 1 + 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/macro/eval/evalStdLib.ml b/src/macro/eval/evalStdLib.ml index 6e61e4facfb..90d448adeb8 100644 --- a/src/macro/eval/evalStdLib.ml +++ b/src/macro/eval/evalStdLib.ml @@ -2084,7 +2084,7 @@ module StdString = struct let i = default_int startIndex this.slength in vint (max 0 (min i this.slength)) end else begin - let i = default_int startIndex (this.slength - 1) in + let i = default_int startIndex (this.slength - str.slength) in let i = if i < 0 then raise Not_found else if i >= this.slength then this.slength - 1 else i in let b = get_offset this i in let offset,_,_ = find_substring this str true i b in diff --git a/src/macro/eval/evalString.ml b/src/macro/eval/evalString.ml index 7ec2fa8c322..fd71bd47a2d 100644 --- a/src/macro/eval/evalString.ml +++ b/src/macro/eval/evalString.ml @@ -133,6 +133,11 @@ let string_of_char_code i = let from_char_code i = create_with_length (string_of_char_code i) 1 +type scan_result = + | ScanHit + | ScanAscii + | ScanOther + let find_substring this sub reverse = let cl_this = this.slength in let cl_sub = sub.slength in @@ -141,25 +146,34 @@ let find_substring this sub reverse = let s_this = this.sstring in let s_sub = sub.sstring in let rec scan b_index b_len = - if b_len = bl_sub then true - else if String.unsafe_get s_this (b_index + b_len) = String.unsafe_get s_sub b_len then scan b_index (b_len + 1) - else false + let c = String.unsafe_get s_this (b_index + b_len) in + if c = String.unsafe_get s_sub b_len then begin + if b_len + 1 = bl_sub then ScanHit + else scan b_index (b_len + 1) + end else if c <= '\127' then + ScanAscii + else + ScanOther in if not reverse then begin let rec loop c_index b_index = if c_index > cl_this - cl_sub || b_index >= bl_this then raise Not_found; - if scan b_index 0 then + match scan b_index 0 with + | ScanHit -> c_index,b_index,b_index + bl_sub - else + | ScanAscii -> + loop (c_index + 1) (b_index + 1) + | ScanOther -> loop (c_index + 1) (UTF8.next s_this b_index) in loop end else begin let rec loop c_index b_index = - if b_index < 0 then raise Not_found; - if scan b_index 0 then + if c_index > cl_this - cl_sub || b_index < 0 then raise Not_found; + match scan b_index 0 with + | ScanHit -> c_index,b_index,b_index + bl_sub - else + | _ -> loop (c_index - 1) (UTF8.prev s_this b_index) in loop diff --git a/tests/unit/src/unitstd/String.unit.hx b/tests/unit/src/unitstd/String.unit.hx index 9baa9b714e1..5b2974f823a 100644 --- a/tests/unit/src/unitstd/String.unit.hx +++ b/tests/unit/src/unitstd/String.unit.hx @@ -83,6 +83,7 @@ s.lastIndexOf("foo") == 6; s.lastIndexOf("foofoo") == 3; s.lastIndexOf("f") == 6; s.lastIndexOf("barb") == 9; +s.lastIndexOf("barb", 12) == -1; s.lastIndexOf("z") == -1; //s.lastIndexOf(null) == -1; //s.lastIndexOf(null, 1) == -1;