In [1]:
import re

# utils

In [2]:
def displaymatch(match: re.Match) -> str:
    if match is None:
        return None
    return "<Match: %r, groups=%r>" % (match.group(), match.groups())


_ = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
print(_)
displaymatch(_)

<re.Match object; span=(0, 12), match='Isaac Newton'>


"<Match: 'Isaac Newton', groups=('Isaac', 'Newton')>"

In [3]:
def display_external_methods(obj):
    for method in dir(obj):
        if not method.startswith("_"):
            print(method)


display_external_methods(int)

as_integer_ratio
bit_count
bit_length
conjugate
denominator
from_bytes
imag
is_integer
numerator
real
to_bytes


# features

### compile for pattern object

In [63]:
pattern = re.compile(r"([a2-9tjqk]{5})")
print(type(pattern))
print(pattern.search("inputs: akt5q, 33333"))

<class 're.Pattern'>
<re.Match object; span=(8, 13), match='akt5q'>


In [64]:
# or use the re funcs directly
print(re.search(r"([a2-9tjqk]{5})", "inputs: akt5q, 33333"))

<re.Match object; span=(8, 13), match='akt5q'>


In [61]:
display_external_methods(pattern)

findall
finditer
flags
fullmatch
groupindex
groups
match
pattern
scanner
search
split
sub
subn


### findall and iter

In [19]:
# findall
target = "res005_mem008_no005_card"
print(re.findall(r"\d+", target))

['005', '008', '005']


In [21]:
# can specify group
print(re.findall(r"\d(\d{2})", target))

['05', '08', '05']


In [22]:
# finditer
target = "res005_mem008_no005_card"
for match in re.finditer(r"\d+", target):
    print(match.group())

005
008
005


### search

In [91]:
pat = re.compile(r'"/home.*"')
match = pat.search("""libname yes "/home/user/data" readonly;""")
print(match)
print(match.group())
print(pat.search("""a_sdtrig = "aserf" #345"""))

<re.Match object; span=(12, 29), match='"/home/user/data"'>
"/home/user/data"
None


### sub

In [61]:
print(three_digi.sub("ddd", "aaa78_mem001-1234567"))

aaa78_memddd-dddddd7


### match

In [75]:
match = re.match(r"(\w+)-(\d+)", "abc-123")
match

<re.Match object; span=(0, 7), match='abc-123'>

In [68]:
# returns None if no match
valid = re.compile(r"(\w+)-(\d+)")
valid.match("akt5q11")

### The match object

In [76]:
display_external_methods(match)

end
endpos
expand
group
groupdict
groups
lastgroup
lastindex
pos
re
regs
span
start
string


In [84]:
word_number = re.compile(r"(\w+)-(\d+)")
match = word_number.match("abc-123")
match

<re.Match object; span=(0, 7), match='abc-123'>

In [85]:
print(match.groups())  # tuple of all subgroups
print(match.group())  # entire match
print(match.group(0, 1, 2))  # subgroups of the match

('abc', '123')
abc-123
('abc-123', 'abc', '123')


In [86]:
numbers = re.compile(r"(\d+)")
match = numbers.search("abc-123_v2")
match

<re.Match object; span=(4, 7), match='123'>

In [87]:
# use group to get entire match as str
print(match.groups())
print(match.group())

('123',)
123


In [88]:
numbers = re.compile(r"\d+")
match = numbers.search("abc-123_v2")
match

<re.Match object; span=(4, 7), match='123'>

In [89]:
# groups can be empty tuple
print(match.groups())
print(match.group())

()
123


### backreference

In [52]:
# detect pair via backreference
pair = re.compile(r".*(.).*\1")
# 0 or more of any, then potential first occurrence,
# then 0 or more of any, then the 2nd occurrence

In [53]:
print(pair.match("01234"))
print(pair.match("0a2a4"))

None
<re.Match object; span=(0, 4), match='0a2a'>


In [55]:
match = pair.match("0a2a4")
print(match.groups())
print(match.group())

('a',)
0a2a
