In [1]:
import re

# utils

In [5]:
def displaymatch(match: re.Match) -> str:
    if match is None:
        return None
    return "<Match: %r, groups=%r>" % (match.group(), match.groups())

_ = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
print(_)
displaymatch(_)


<re.Match object; span=(0, 12), match='Isaac Newton'>


"<Match: 'Isaac Newton', groups=('Isaac', 'Newton')>"

In [47]:
def display_external_methods(obj):
    for method in dir(obj):
        if not method.startswith("_"):
            print(method)

display_external_methods(int)

as_integer_ratio
bit_count
bit_length
conjugate
denominator
from_bytes
imag
is_integer
numerator
real
to_bytes


# features

### findall and iter

In [19]:
# findall
target = "res005_mem008_no005_card"
print(re.findall(r'\d+', target))

['005', '008', '005']


In [21]:
# can specify group
print(re.findall(r'\d(\d{2})', target))

['05', '08', '05']


In [22]:
# finditer
target = "res005_mem008_no005_card"
for match in re.finditer(r'\d+', target):
    print(match.group())

005
008
005


### compile for pattern object

In [34]:
valid = re.compile(r"^[a2-9tjqk]{5}$")
type(valid)

re.Pattern

In [42]:
display_external_methods(valid)

findall
finditer
flags
fullmatch
groupindex
groups
match
pattern
scanner
search
split
sub
subn


In [33]:
print(valid.findall("akt5q"))

['akt5q']


### search

In [59]:
three_digi = re.compile(r'\d{3}')

In [60]:
print(three_digi.findall("aaa78_mem001-1234567"))

['001', '123', '456']


### sub

In [61]:
print(three_digi.sub("ddd","aaa78_mem001-1234567"))

aaa78_memddd-dddddd7


### match

In [75]:
match = re.match(r"(\w+)-(\d+)", "abc-123")
match

<re.Match object; span=(0, 7), match='abc-123'>

In [82]:
# returns None if no match
valid.match("akt5q11")

### The match object

In [76]:
display_external_methods(match)

end
endpos
expand
group
groupdict
groups
lastgroup
lastindex
pos
re
regs
span
start
string


In [78]:
match.groups()

('abc', '123')

In [77]:
match.group()

'abc-123'

In [79]:
match.group(0)

'abc-123'

In [80]:
match.group(1)

'abc'

In [81]:
match.group(2)

'123'

### backreference

In [None]:
# detect pair via backreference
pair = re.compile(r".*(.).*\1")
# 0 or more of any, then potential first occurrence,
# then 0 or more of any, then the 2nd occurrence
# making a pair
displaymatch(pair.match("717ak"))  # Pair of 7s.
displaymatch(pair.match("718ak"))  # No pairs.
displaymatch(pair.match("354aa"))  # Pair of aces.
displaymatch(pair.match("aa778"))  # greedy
displaymatch(re.compile(r".*?(.).*?\1").match("aa778"))  # non-greedy


In [None]:

pair.match("717ak").group()  # the whole match
pair.match("717ak").group(1)  # the first group

In [15]:
# find the paths starting from a location in a line
targets = [
    """libname yes "/home/user/data;" readonly;""",
    """libname no "/user/data;" readonly;""",
    """a_sdtrig = "aserf" #345""",
]

#.*? is a non-greedy match for any character (.) repeated 0 or more times (*). 
# The ? makes it non-greedy, meaning it will match as few characters as possible 
# while still satisfying the rest of the pattern.

for target in targets:
    print(f"target: {target}")

    print(re.findall(r'"/home.*?"', target))

    potential = re.search(r'"/home.*?"', target)
    if potential:
        print(potential.group())

    print("\n")
    



target: libname yes "/home/user/data;" readonly;
['"/home/user/data;"']
"/home/user/data;"


target: libname no "/user/data;" readonly;
[]


target: a_sdtrig = "aserf" #345
[]


