In [1]:
import re

In [7]:
# prefixing metacharacter with `\` make it const char.
print(re.search(r'b\^2', 'a^2 + b^2 - C*3'))
print(re.sub(r'\\', '/', r'path\something\wrong'))

<re.Match object; span=(6, 9), match='b^2'>
path/something/wrong


## escape

In [13]:
# `re.escape`는 `\`를 원래 패턴 문자열마다 달 필요가 없게 만든다.
# 덕분에 re.escape의 인자를 보기만 해도 무엇이 추출될지 짐작하기 쉽다.
eqn = 'f*(a^b) - 3*(a^b)'

raw_pattern = r'(a^b)'
escaped_pattern = re.escape(raw_pattern)
print(re.sub(escaped_pattern, 'c', eqn))

raw_pattern = '(a^b)'
esc_pattern = re.escape(raw_pattern)
print(re.sub(esc_pattern, 'c', eqn))

f*c - 3*c
f*c - 3*c


In [22]:
s = 'ba_423 (a^b)c 2|3 a^b'
terms = ['a_42', '(a^b)', '2|3']

print('Escaped:')
escaped_pattern = '|'.join(map(re.escape, terms))
print(escaped_pattern)
print(re.sub(escaped_pattern, 'X', s))

print('\nUnescaped:')
unescaped_pattern = '|'.join(terms)
print(unescaped_pattern)
print(re.sub(unescaped_pattern, 'X', s))

Escaped:
a_42|\(a\^b\)|2\|3
bX3 Xc X a^b

Unescaped:
a_42|(a^b)|2|3
bXX (a^b)c X|X a^b


In [26]:
re.sub(r'\t', ':', 'a\tb\tc')

'a:b:c'

In [27]:
# Undefined escape sequence print an error
re.search(r'\e', 'hell\o')

error: bad escape \e at position 0

In [36]:
# Hexadecimal representation: \x00
# '\x00' is a 1-byte character.
s1 = re.sub(r'\x20', '', ' h e l l o ')  # '\x20' means a space.
s2 = re.sub(r' ', '', ' h e l l o ')
print(f'substituted: {s1}, \nand they are same? {s1 == s2}')

substituted: hello, 
and they are same? True


# Exercises

In [44]:
# E1. Transform the given input strings to the expected output using the same
# logic on both strings.
str1 = '(9-2)*5+qty/3'
str2 = '(qty+4)/2-(9-2)*5+pq/4'

In [45]:
# A1.
# '35+qty/3'
# '(qty+4)/2-35+pq/4'
raw_pattern = r'(9-2)*5'
repl = '35'
escaped_pattern = re.escape(raw_pattern)
compiled_pattern = re.compile(escaped_pattern)
print(compiled_pattern.sub(repl, str1))
print(compiled_pattern.sub(repl, str2))

35+qty/3
(qty+4)/2-35+pq/4


In [46]:
# E2. Replace '(4)\|' with '2' only at the start or end of given input strings.
s1 = r'2.3/(4)\|6 foo 5.3-(4)\|'
s2 = r'(4)\|42 - (4)\|3'
s3 = 'two - (4)\\|\n'

In [50]:
# A2
raw_patterns = [r'(4)\|', r'(4)\|']
escaped_pattern = list(map(re.escape, raw_patterns))
escaped_pattern[0] = '\A' + escaped_pattern[0]
escaped_pattern[1] = escaped_pattern[1] + '\Z'

compiled_pattern = re.compile('|'.join(escaped_pattern))
print(compiled_pattern.sub('2', s1))  # '2.3/(4)\|6 foo 5.3-2'
print(compiled_pattern.sub('2', s2))  # '242 - (4)\|3'
print(compiled_pattern.sub('2', s3))  # 'two - (4)\|\n'

2.3/(4)\|6 foo 5.3-2
242 - (4)\|3
two - (4)\|



In [51]:
# E3. Replace any matching element from the list `items` with 'X' for given the
# input strings. Match the elements from `items` literally. Assume no two
# elements of `items` will result in any matching conflict.
items = ['a.b', '3+n', r'x\y\z', 'qty||price', '{n}']

In [54]:
# A3
escaped_pattern = '|'.join(map(re.escape, items))
pat = re.compile(escaped_pattern)

s1 = pat.sub('X', '0a.bcd')  # '0Xcd'
s2 = pat.sub('X', 'E{n}AMPLE')  # 'EXAMPLE'
s3 = pat.sub('X', r'43+n2 ax\y\ze')  # '4X2 aXe'

print(f'{s1}\n{s2}\n{s3}')

0Xcd
EXAMPLE
4X2 aXe


In [55]:
# E4. Replace backspace character '\b' with a single space character for the
# given input string.
ip = '123\b456'
# '123\x08456'
print(ip)
# 12456

12456


In [57]:
# A4
raw_pattern = r'\b'
re.sub(re.escape(raw_pattern), ' ', ip)        ##### add your solution here
'123 456'

'123 456'

In [58]:
# E5. Replace all occurrences of '\e' with 'e'.
ip = r'th\er\e ar\e common asp\ects among th\e alt\ernations'

In [60]:
# A5
re.sub(re.escape(r'\e'), 'e', ip)
'there are common aspects among the alternations'

'there are common aspects among the alternations'

In [61]:
# E6. Replace any matching item from the list `eqns` with 'X' for given the
# string `ip`. Match the items from `eqns` literally.
ip = '3-(a^b)+2*(a^b)-(a/b)+3'
eqns = ['(a^b)', '(a/b)', '(a^b)+2']

In [63]:
# A6
# '3-X*X-X+3'
escaped_pattern = '|'.join(map(re.escape, sorted(eqns, key=len, reverse=True)))
pat = re.compile(escaped_pattern)
pat.sub('X', ip)

'3-X*X-X+3'