Skip to content

Commit

Permalink
Support backcapture references in 's replacement string
Browse files Browse the repository at this point in the history
  • Loading branch information
malmaud committed Jul 6, 2015
1 parent a4a75dc commit c67c828
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 16 deletions.
1 change: 1 addition & 0 deletions base/exports.jl
Expand Up @@ -1337,6 +1337,7 @@ export
# notation for certain types
@b_str, # byte vector
@r_str, # regex
@s_str, # regex substitution string
@v_str, # version number

# documentation
Expand Down
17 changes: 17 additions & 0 deletions base/pcre.jl
Expand Up @@ -140,6 +140,23 @@ function substring_number_from_name(re, name)
(Ptr{Void}, Cstring), re, name)
end

function substring_length_bynumber(match_data, number)
s = Ref{Csize_t}()
rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
(Ptr{Void}, UInt32, Ref{Csize_t}), match_data, number, s)
rc < 0 && error("PCRE error: $(err_message(rc))")
convert(Int, s[])
end

function substring_copy_bynumber(match_data, number, buf, buf_size)
s = Ref{Csize_t}(buf_size)
rc = ccall((:pcre2_substring_copy_bynumber_8, PCRE_LIB), Cint,
(Ptr{Void}, UInt32, Ptr{UInt8}, Ref{Csize_t}),
match_data, number, buf, s)
rc < 0 && error("PCRE error: $(err_message(rc))")
convert(Int, s[])
end

function capture_names(re)
name_count = info(re, INFO_NAMECOUNT, UInt32)
name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
Expand Down
84 changes: 78 additions & 6 deletions base/regex.jl
Expand Up @@ -208,14 +208,86 @@ end
search(s::AbstractString, r::Regex, idx::Integer) =
throw(ArgumentError("regex search is only available for bytestrings; use bytestring(s) to convert"))
search(s::AbstractString, r::Regex) = search(s,r,start(s))
_search(s::AbstractString, r::Regex, idx::Integer) = match(r, s, idx)

first(m::RegexMatch) = m.offset
last(m::RegexMatch) = m.offset + length(m.match) - 1
immutable SubstitutionString{T<:AbstractString} <: AbstractString
string::T
end

endof(s::SubstitutionString) = endof(s.string)
next(s::SubstitutionString, idx::Int) = next(s.string, idx)
function show(io::IO, s::SubstitutionString)
print(io, "s")
show(io, s.string)
end

macro s_str(string) SubstitutionString(string) end

replace_err(repl) = error("Bad replacement string: $repl")

function _write_capture(io, re, group)
len = PCRE.substring_length_bynumber(re.match_data, group)
ensureroom(io, len+1)
PCRE.substring_copy_bynumber(re.match_data, group,
pointer(io.data, io.ptr), len+1)
io.ptr += len
end

function _replace(io, repl::AbstractString, str, m::RegexMatch)
write(io, "repl")
write(io, repl)
function _replace(io, repl_s::SubstitutionString, str, r, re)
const SUB_CHAR = '\\'
const GROUP_CHAR = 'g'
const LBRACKET = '<'
const RBRACKET = '>'
repl = repl_s.string
i = start(repl)
e = endof(repl)
while i <= e
if repl[i] == SUB_CHAR
next_i = nextind(repl, i)
next_i > e && replace_err(repl)
if repl[next_i] == SUB_CHAR
write(io, SUB_CHAR, SUB_CHAR)
i = nextind(repl, next_i)
elseif isnumber(repl[next_i])
group = parse(Int, repl[next_i])
i = nextind(repl, next_i)
while i <= e
if isnumber(repl[i])
group = 10group + parse(Int, repl[i])
i = nextind(repl, i)
else
break
end
end
_write_capture(io, re, group)
elseif repl[next_i] == GROUP_CHAR
i = nextind(repl, next_i)
if i > e || repl[i] != LBRACKET
replace_err(repl)
end
i = nextind(repl, i)
i > e && replace_err(repl)
groupstart = i
while repl[i] != RBRACKET
i = nextind(repl, i)
i > e && replace_err(repl)
end
#groupname = repl[groupstart:prevind(repl, i)]
groupname = SubString(repl, groupstart, prevind(repl, i))
if isnumber(groupname)
_write_capture(io, re, parse(Int, groupname))
else
_write_capture(io, re,
PCRE.substring_number_from_name(re.regex, groupname))
end
i = nextind(repl, i)
else
replace_err(repl)
end
else
write(io, repl[i])
i = nextind(repl, i)
end
end
end

immutable RegexMatchIterator
Expand Down
12 changes: 5 additions & 7 deletions base/string.jl
Expand Up @@ -1327,11 +1327,9 @@ function _rsplit{T<:AbstractString,U<:Array}(str::T, splitter, limit::Integer, k
end
#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)

_replace(io, repl, str, r) = write(io, repl)
_replace(io, repl::Function, str, r) =
_replace(io, repl, str, r, pattern) = write(io, repl)
_replace(io, repl::Function, str, r, pattern) =
write(io, repl(SubString(str, first(r), last(r))))
_search(str, pattern, offset) = search(str, pattern, offset)


function replace(str::ByteString, pattern, repl, limit::Integer)
n = 1
Expand All @@ -1340,10 +1338,11 @@ function replace(str::ByteString, pattern, repl, limit::Integer)
r = search(str,pattern,i)
j, k = first(r), last(r)
out = IOBuffer()
ensureroom(out, floor(Int, 1.2sizeof(str)))
while j != 0
if i == a || i <= k
write_sub(out, str.data, i, j-i)
_replace(out, repl, str, r)
_replace(out, repl, str, r, pattern)
end
if k<j
i = j
Expand All @@ -1354,8 +1353,7 @@ function replace(str::ByteString, pattern, repl, limit::Integer)
if j > e
break
end
r = _search(str,pattern,k)
r == nothing && break
r = search(str,pattern,k)
j, k = first(r), last(r)
n == limit && break
n += 1
Expand Down
10 changes: 7 additions & 3 deletions test/regex.jl
Expand Up @@ -39,6 +39,10 @@ show(buf, r"")
@test_throws ArgumentError search(utf32("this is a test"), r"test")

# Named subpatterns
m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
@test (m[:a], m[2], m["b"]) == ("x", "y", "z")
@test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
end

# Backcapture reference in substitution string
@test replace("abcde", r"(..)(?P<byname>d)", s"\g<byname>xy\1") == "adxybce"

0 comments on commit c67c828

Please sign in to comment.