From 2b4f003a6d151380783027c3fe153cbb25028340 Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Fri, 1 Feb 2019 21:54:41 +0100 Subject: [PATCH] Make `startswith`, `endswith` work with `Regex` (#29790) --- base/pcre.jl | 1 + base/regex.jl | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/regex.jl | 12 ++++++++++ 3 files changed, 75 insertions(+) diff --git a/base/pcre.jl b/base/pcre.jl index 845187f893443..7aa91076b84ad 100644 --- a/base/pcre.jl +++ b/base/pcre.jl @@ -37,6 +37,7 @@ const COMPILE_MASK = CASELESS | DOLLAR_ENDONLY | DOTALL | + ENDANCHORED | EXTENDED | FIRSTLINE | MULTILINE | diff --git a/base/regex.jl b/base/regex.jl index 384d6ca3dfa21..d952f1f6ba4b8 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -174,6 +174,68 @@ function occursin(r::Regex, s::SubString; offset::Integer=0) r.match_data) end +""" + startswith(s::AbstractString, prefix::Regex) + +Return `true` if `s` starts with the regex pattern, `prefix`. + +!!! note + `startswith` does not compile the anchoring into the regular + expression, but instead passes the anchoring as + `match_option` to PCRE. If compile time is amortized, + `occursin(r"^...", s)` is faster than `startswith(s, r"...")`. + +See also [`occursin`](@ref) and [`endswith`](@ref). + +# Examples +```jldoctest +julia> startswith("JuliaLang", r"Julia|Romeo") +true +``` +""" +function startswith(s::AbstractString, r::Regex) + compile(r) + return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ANCHORED, + r.match_data) +end + +function startswith(s::SubString, r::Regex) + compile(r) + return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ANCHORED, + r.match_data) +end + +""" + endswith(s::AbstractString, suffix::Regex) + +Return `true` if `s` ends with the regex pattern, `suffix`. + +!!! note + `endswith` does not compile the anchoring into the regular + expression, but instead passes the anchoring as + `match_option` to PCRE. If compile time is amortized, + `occursin(r"...\$", s)` is faster than `endswith(s, r"...")`. + +See also [`occursin`](@ref) and [`startswith`](@ref). + +# Examples +```jldoctest +julia> endswith("JuliaLang", r"Lang|Roberts") +true +``` +""" +function endswith(s::AbstractString, r::Regex) + compile(r) + return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ENDANCHORED, + r.match_data) +end + +function endswith(s::SubString, r::Regex) + compile(r) + return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED, + r.match_data) +end + """ match(r::Regex, s::AbstractString[, idx::Integer[, addopts]]) diff --git a/test/regex.jl b/test/regex.jl index 6fc5c2f159749..cb3fa965f8a50 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -66,6 +66,18 @@ # Regex behaves like a scalar in broadcasting @test occursin.(r"Hello", ["Hello", "World"]) == [true, false] + @test startswith("abc", r"a") + @test endswith("abc", r"c") + @test !startswith("abc", r"b") + @test !startswith("abc", r"c") + @test !endswith("abc", r"a") + @test !endswith("abc", r"b") + + @test !startswith("abc", r"A") + @test startswith("abc", r"A"i) + @test !endswith("abc", r"C") + @test endswith("abc", r"C"i) + # Test that PCRE throws the correct kind of error # TODO: Uncomment this once the corresponding change has propagated to CI #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)