Skip to content

Commit

Permalink
[eval] slightly optimize find_substring for ascii strings
Browse files Browse the repository at this point in the history
  • Loading branch information
Simn committed Feb 5, 2019
1 parent 063d148 commit fcdd51a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/macro/eval/evalStdLib.ml
Expand Up @@ -2084,7 +2084,7 @@ module StdString = struct
let i = default_int startIndex this.slength in
vint (max 0 (min i this.slength))
end else begin
let i = default_int startIndex (this.slength - 1) in
let i = default_int startIndex (this.slength - str.slength) in
let i = if i < 0 then raise Not_found else if i >= this.slength then this.slength - 1 else i in
let b = get_offset this i in
let offset,_,_ = find_substring this str true i b in
Expand Down
30 changes: 22 additions & 8 deletions src/macro/eval/evalString.ml
Expand Up @@ -133,6 +133,11 @@ let string_of_char_code i =
let from_char_code i =
create_with_length (string_of_char_code i) 1

type scan_result =
| ScanHit
| ScanAscii
| ScanOther

let find_substring this sub reverse =
let cl_this = this.slength in
let cl_sub = sub.slength in
Expand All @@ -141,25 +146,34 @@ let find_substring this sub reverse =
let s_this = this.sstring in
let s_sub = sub.sstring in
let rec scan b_index b_len =
if b_len = bl_sub then true
else if String.unsafe_get s_this (b_index + b_len) = String.unsafe_get s_sub b_len then scan b_index (b_len + 1)
else false
let c = String.unsafe_get s_this (b_index + b_len) in
if c = String.unsafe_get s_sub b_len then begin
if b_len + 1 = bl_sub then ScanHit
else scan b_index (b_len + 1)
end else if c <= '\127' then
ScanAscii
else
ScanOther
in
if not reverse then begin
let rec loop c_index b_index =
if c_index > cl_this - cl_sub || b_index >= bl_this then raise Not_found;
if scan b_index 0 then
match scan b_index 0 with
| ScanHit ->
c_index,b_index,b_index + bl_sub
else
| ScanAscii ->
loop (c_index + 1) (b_index + 1)
| ScanOther ->
loop (c_index + 1) (UTF8.next s_this b_index)
in
loop
end else begin
let rec loop c_index b_index =
if b_index < 0 then raise Not_found;
if scan b_index 0 then
if c_index > cl_this - cl_sub || b_index < 0 then raise Not_found;
match scan b_index 0 with
| ScanHit ->
c_index,b_index,b_index + bl_sub
else
| _ ->
loop (c_index - 1) (UTF8.prev s_this b_index)
in
loop
Expand Down
1 change: 1 addition & 0 deletions tests/unit/src/unitstd/String.unit.hx
Expand Up @@ -83,6 +83,7 @@ s.lastIndexOf("foo") == 6;
s.lastIndexOf("foofoo") == 3;
s.lastIndexOf("f") == 6;
s.lastIndexOf("barb") == 9;
s.lastIndexOf("barb", 12) == -1;
s.lastIndexOf("z") == -1;
//s.lastIndexOf(null) == -1;
//s.lastIndexOf(null, 1) == -1;
Expand Down

1 comment on commit fcdd51a

@Simn
Copy link
Member Author

@Simn Simn commented on fcdd51a Feb 5, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a huge difference but should be worth it.

Before:

Case: StringScan
  Suite: length 20003
           indexOf hit: 3,632,632 (100.00%)
       lastIndexOf hit: 3,280,280 ( 90.30%)
          indexOf find:     7,625 (  0.20%)
      lastIndexOf find:     6,600 (  0.18%)
        indexOf nofind:     3,842 (  0.10%)
    lastIndexOf nofind:     3,313 (  0.09%)
          split nofind:     3,302 (  0.09%)
            split find:     3,189 (  0.08%)

After:

Case: StringScan
  Suite: length 20003
           indexOf hit: 3,573,573 (100.00%)
       lastIndexOf hit: 3,319,319 ( 92.90%)
          indexOf find:    10,762 (  0.30%)
      lastIndexOf find:     7,246 (  0.20%)
        indexOf nofind:     5,447 (  0.15%)
          split nofind:     4,379 (  0.12%)
            split find:     4,298 (  0.12%)
    lastIndexOf nofind:     3,716 (  0.10%)

Please sign in to comment.