Skip to content

Commit

Permalink
Implement #match! for Regex (crystal-lang#13285)
Browse files Browse the repository at this point in the history
Co-authored-by: Sijawusz Pur Rahnama <sija@sija.pl>
Co-authored-by: Johannes Müller <straightshoota@gmail.com>
  • Loading branch information
3 people authored and Blacksmoke16 committed Dec 11, 2023
1 parent e95380b commit 089d9cc
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 54 deletions.
27 changes: 27 additions & 0 deletions spec/std/regex_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,33 @@ describe "Regex" do
end
end

describe "#match!" do
it "returns match data" do
md = /(?<bar>.)(?<foo>.)/.match!("Crystal")
md[0].should eq "Cr"
md.captures.should eq [] of String
md.named_captures.should eq({"bar" => "C", "foo" => "r"})
end

it "assigns captures" do
md = /foo/.match!("foo")
$~.should eq md
end

it "raises on non-match" do
expect_raises(Regex::Error, "Match not found") { /Crystal/.match!("foo") }
expect_raises(NilAssertionError) { $~ }
end

context "with options" do
it "Regex::Match options" do
expect_raises(Regex::Error, "Match not found") do
/foo/.match!(".foo", options: Regex::MatchOptions::ANCHORED)
end
end
end
end

describe "#match_at_byte_index" do
it "assigns captures" do
matchdata = /foo/.match_at_byte_index("..foo", 1)
Expand Down
19 changes: 19 additions & 0 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2467,6 +2467,25 @@ describe "String" do
end
end

describe "match!" do
it "returns matchdata" do
md = "Crystal".match! /(?<bar>.)(?<foo>.)/
md[0].should eq "Cr"
md.captures.should eq [] of String
md.named_captures.should eq({"bar" => "C", "foo" => "r"})
end

it "assigns captures" do
md = "foo".match! /foo/
$~.should eq md
end

it "raises on non-match" do
expect_raises(Regex::Error, "Match not found") { "foo".match! /Crystal/ }
expect_raises(NilAssertionError) { $~ }
end
end

it "does %" do
("Hello %d world" % 123).should eq("Hello 123 world")
("Hello %d world" % [123]).should eq("Hello 123 world")
Expand Down
15 changes: 15 additions & 0 deletions src/regex.cr
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,21 @@ class Regex
match(str, pos, options: _options)
end

# Matches a regular expression against *str*. This starts at the character
# index *pos* if given, otherwise at the start of *str*. Returns a `Regex::MatchData`
# if *str* matched, otherwise raises `Regex::Error`. `$~` will contain the same value
# if matched.
#
# ```
# /(.)(.)(.)/.match!("abc")[2] # => "b"
# /(.)(.)/.match!("abc", 1)[2] # => "c"
# /(.)(タ)/.match!("クリスタル", 3)[2] # raises Exception
# ```
def match!(str : String, pos : Int32 = 0, *, options : Regex::MatchOptions = :none) : MatchData
byte_index = str.char_index_to_byte_index(pos) || raise Error.new "Match not found"
$~ = match_at_byte_index(str, byte_index, options) || raise Error.new "Match not found"
end

# Match at byte index. Matches a regular expression against `String`
# *str*. Starts at the byte index given by *pos* if given, otherwise at
# the start of *str*. Returns a `Regex::MatchData` if *str* matched, otherwise
Expand Down
98 changes: 49 additions & 49 deletions src/regex/match_data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,32 @@ class Regex
# Returns the original regular expression.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.regex # => /[p-s]/
# "Crystal".match!(/[p-s]/).regex # => /[p-s]/
# ```
getter regex : Regex

# Returns the number of capture groups, including named capture groups.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.group_size # => 0
# "Crystal".match(/r(ys)/).not_nil!.group_size # => 1
# "Crystal".match(/r(ys)(?<ok>ta)/).not_nil!.group_size # => 2
# "Crystal".match!(/[p-s]/).group_size # => 0
# "Crystal".match!(/r(ys)/).group_size # => 1
# "Crystal".match!(/r(ys)(?<ok>ta)/).group_size # => 2
# ```
getter group_size : Int32

# Returns the original string.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.string # => "Crystal"
# "Crystal".match!(/[p-s]/).string # => "Crystal"
# ```
getter string : String

# Returns the number of elements in this match object.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.size # => 1
# "Crystal".match(/r(ys)/).not_nil!.size # => 2
# "Crystal".match(/r(ys)(?<ok>ta)/).not_nil!.size # => 3
# "Crystal".match!(/[p-s]/).size # => 1
# "Crystal".match!(/r(ys)/).size # => 2
# "Crystal".match!(/r(ys)(?<ok>ta)/).size # => 3
# ```
def size : Int32
group_size + 1
Expand All @@ -61,11 +61,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.begin(0) # => 1
# "Crystal".match(/r/).not_nil!.begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.begin(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).begin(0) # => 1
# "Crystal".match!(/r(ys)/).begin(1) # => 2
# "クリスタル".match!(/リ(ス)/).begin(0) # => 1
# "Crystal".match!(/r/).begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).begin(1) # IndexError: Capture group 1 was not matched
# ```
def begin(n = 0) : Int32
@string.byte_index_to_char_index(byte_begin(n)).not_nil!
Expand All @@ -80,11 +80,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.end(0) # => 3
# "Crystal".match(/r/).not_nil!.end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.end(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).end(0) # => 2
# "Crystal".match!(/r(ys)/).end(1) # => 4
# "クリスタル".match!(/リ(ス)/).end(0) # => 3
# "Crystal".match!(/r/).end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).end(1) # IndexError: Capture group 1 was not matched
# ```
def end(n = 0) : Int32
@string.byte_index_to_char_index(byte_end(n)).not_nil!
Expand All @@ -99,11 +99,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.byte_begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.byte_begin(0) # => 3
# "Crystal".match(/r/).not_nil!.byte_begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.byte_begin(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).byte_begin(0) # => 1
# "Crystal".match!(/r(ys)/).byte_begin(1) # => 2
# "クリスタル".match!(/リ(ス)/).byte_begin(0) # => 3
# "Crystal".match!(/r/).byte_begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).byte_begin(1) # IndexError: Capture group 1 was not matched
# ```
def byte_begin(n = 0) : Int32
check_index_out_of_bounds n
Expand All @@ -119,11 +119,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.byte_end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.byte_end(0) # => 9
# "Crystal".match(/r/).not_nil!.byte_end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.byte_end(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).byte_end(0) # => 2
# "Crystal".match!(/r(ys)/).byte_end(1) # => 4
# "クリスタル".match!(/リ(ス)/).byte_end(0) # => 9
# "Crystal".match!(/r/).byte_end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).byte_end(1) # IndexError: Capture group 1 was not matched
# ```
def byte_end(n = 0) : Int32
check_index_out_of_bounds n
Expand All @@ -136,9 +136,9 @@ class Regex
# When *n* is `0`, returns the match for the entire `Regex`.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![0]? # => "rys"
# "Crystal".match(/r(ys)/).not_nil![1]? # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2]? # => nil
# "Crystal".match!(/r(ys)/)[0]? # => "rys"
# "Crystal".match!(/r(ys)/)[1]? # => "ys"
# "Crystal".match!(/r(ys)/)[2]? # => nil
# ```
def []?(n : Int) : String?
return unless valid_group?(n)
Expand All @@ -151,8 +151,8 @@ class Regex
# if there is no *n*th capture group.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![1] # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2] # raises IndexError
# "Crystal".match!(/r(ys)/)[1] # => "ys"
# "Crystal".match!(/r(ys)/)[2] # raises IndexError
# ```
def [](n : Int) : String
check_index_out_of_bounds n
Expand All @@ -165,15 +165,15 @@ class Regex
# `nil` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"]? # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"]? # => nil
# "Crystal".match!(/r(?<ok>ys)/)["ok"]? # => "ys"
# "Crystal".match!(/r(?<ok>ys)/)["ng"]? # => nil
# ```
#
# When there are capture groups having same name, it returns the last
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"]? # => "al"
# "Crystal".match!(/(?<ok>Cr).*(?<ok>al)/)["ok"]? # => "al"
# ```
def []?(group_name : String) : String?
fetch_impl(group_name) { nil }
Expand All @@ -183,15 +183,15 @@ class Regex
# raises an `KeyError` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"] # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"] # raises KeyError
# "Crystal".match!(/r(?<ok>ys)/)["ok"] # => "ys"
# "Crystal".match!(/r(?<ok>ys)/)["ng"] # raises KeyError
# ```
#
# When there are capture groups having same name, it returns the last
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"] # => "al"
# "Crystal".match!(/(?<ok>Cr).*(?<ok>al)/)["ok"] # => "al"
# ```
def [](group_name : String) : String
fetch_impl(group_name) { |exists|
Expand Down Expand Up @@ -230,7 +230,7 @@ class Regex
# starts at the start of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.pre_match # => "Cr"
# "Crystal".match!(/yst/).pre_match # => "Cr"
# ```
def pre_match : String
@string.byte_slice(0, byte_begin(0))
Expand All @@ -240,7 +240,7 @@ class Regex
# at the end of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.post_match # => "al"
# "Crystal".match!(/yst/).post_match # => "al"
# ```
def post_match : String
@string.byte_slice(byte_end(0))
Expand All @@ -251,12 +251,12 @@ class Regex
# It is a difference from `to_a` that the result array does not contain the match for the entire `Regex` (`self[0]`).
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.captures # => ["Cr", "st"]
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(stal)?/)
# match.captures # => ["Cr", nil]
# ```
def captures : Array(String?)
Expand All @@ -273,12 +273,12 @@ class Regex
# Returns a hash of named capture groups.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.named_captures # => {"name1" => "y", "name2" => "al"}
#
# # When this regex has an optional group, result hash may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(?<name1>Cr)(?<name2>stal)?/).not_nil!
# match = "Crystal".match!(/(?<name1>Cr)(?<name2>stal)?/)
# match.named_captures # => {"name1" => "Cr", "name2" => nil}
# ```
def named_captures : Hash(String, String?)
Expand All @@ -297,12 +297,12 @@ class Regex
# Convert this match data into an array.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.to_a # => ["Crystal", "Cr", "y", "st", "al"]
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(?<name1>stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>stal)?/)
# match.to_a # => ["Cr", "Cr", nil]
# ```
def to_a : Array(String?)
Expand All @@ -312,12 +312,12 @@ class Regex
# Convert this match data into a hash.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.to_h # => {0 => "Crystal", 1 => "Cr", "name1" => "y", 3 => "st", "name2" => "al"}
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(?<name1>stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>stal)?/)
# match.to_h # => {0 => "Cr", 1 => "Cr", "name1" => nil}
# ```
def to_h : Hash(Int32 | String, String?)
Expand Down
19 changes: 14 additions & 5 deletions src/string.cr
Original file line number Diff line number Diff line change
Expand Up @@ -4558,8 +4558,7 @@ class String
end
end

# Finds match of *regex*, starting at *pos*.
# It also updates `$~` with the result.
# Finds matches of *regex* starting at *pos* and updates `$~` to the result.
#
# ```
# "foo".match(/foo/) # => Regex::MatchData("foo")
Expand All @@ -4569,9 +4568,19 @@ class String
# $~ # raises Exception
# ```
def match(regex : Regex, pos = 0) : Regex::MatchData?
match = regex.match self, pos
$~ = match
match
$~ = regex.match self, pos
end

# Finds matches of *regex* starting at *pos* and updates `$~` to the result.
# Raises `Regex::Error` if there are no matches.
#
# ```
# "foo".match!(/foo/) # => Regex::MatchData("foo")
# $~ # => Regex::MatchData("foo")
#
# "foo".match!(/bar/) # => raises Exception
def match!(regex : Regex, pos = 0) : Regex::MatchData
$~ = regex.match! self, pos
end

# Finds match of *regex* like `#match`, but it returns `Bool` value.
Expand Down

0 comments on commit 089d9cc

Please sign in to comment.