In [1]:
import re

In [2]:
re.search(r'ab*c','abc ac adc abbbc')

<re.Match object; span=(0, 3), match='abc'>

In [3]:
re.fullmatch(r'1(2|3)*4','1233224')

<re.Match object; span=(0, 7), match='1233224'>

In [4]:
sentence = 'that is quite a fabricated tale'
m = re.search(r'q.*?t', sentence)

In [5]:
m.span()

(8, 12)

In [6]:
m.span()[0]

8

In [7]:
re.search(r'q.*?t', sentence).span()

(8, 12)

In [8]:
re.search(r'b.*d','abc ac adc abbbc')

<re.Match object; span=(1, 9), match='bc ac ad'>

In [9]:
re.search(r'b.*d','abc ac adc abbbc')[0]

'bc ac ad'

In [10]:
re.search(r'b.*d','abc ac adc abbbc').group(0)

'bc ac ad'

In [11]:
m = re.fullmatch(r'a(.*?) (.*)d(.*)c','abc ac adc abbbc')

In [12]:
m

<re.Match object; span=(0, 16), match='abc ac adc abbbc'>

In [13]:
print(f"First group {m[0]}")
print(f"Second group {m[1]}")
print(f"Third group {m[2]}")

First group abc ac adc abbbc
Second group bc
Third group ac a


In [14]:
[m[i] for i in range(4)]

['abc ac adc abbbc', 'bc', 'ac a', 'c abbb']

In [15]:
m.group(3,1)

('c abbb', 'bc')

In [16]:
m.groups()

('bc', 'ac a', 'c abbb')

In [17]:
m = re.search(r'w(.*)me','awesome')

In [18]:
m.span()

(1, 7)

In [19]:
m.span(1)

(2, 5)

In [20]:
m.start()

1

In [21]:
m.end(1)

5

In [22]:
pat = re.compile(r'hi.*bye')
m = pat.search('This is goodbye then',1,15)

In [23]:
m.pos

1

In [24]:
m.endpos

15

In [25]:
m.re

re.compile(r'hi.*bye', re.UNICODE)

In [26]:
m.string

'This is goodbye then'

In [27]:
# Assignment expressions
if m := re.search(r'(.*)s','oh!'):
    print(m[1])

In [28]:
if m := re.search(r'(.*)s','awesome'):
    print(m[1])

awe


In [29]:
text = ['type: fruit', 'date: 2020/04/28']

In [30]:
for ip in text:
    if m := re.search(r'type: (.*)',ip):
        print(m[1])
    elif m := re.search(r'date: (.*?)/(.*?)/', ip):
        print(f'month: {m[2]},year{m[1]}')

fruit
month: 04,year2020


In [31]:
# Using functions in replacement section
re.sub(r'(a|b)\^2', lambda m: m[0].upper(),'a^2 + b^2 - C*3')

'A^2 + B^2 - C*3'

In [32]:
re.sub(r'2|3',lambda m: str(int(m[0])**2),'a^2 + b^2 - C*3')

'a^4 + b^4 - C*9'

In [33]:
dict = { '1': 'one', '2': 'two', '4': 'four'}
re.sub(r'1|2|4', lambda m: dict[m[0]], '9234012')

'9two3four0onetwo'

In [34]:
re.sub(r'0|1|2|3|4|5|6|7|8|9',lambda m: dict.get(m[0], 'X'),'9234012')

'XtwoXfourXonetwo'

In [35]:
swap = { 'cat': 'tiger', 'tiger':'cat'}
words = 'cat tiger dog tiger cat'

re.sub(r'cat|tiger',lambda m: swap[m[0]], words)

'tiger cat dog cat tiger'

In [36]:
dict = { 'hand': '1', 'handy': '2', 'handful': '3', 'a^b': '4'}

In [37]:
words = sorted(dict.keys(), key=len,reverse=True)

In [38]:
words

['handful', 'handy', 'hand', 'a^b']

In [39]:
pat = re.compile('|'.join(re.escape(s) for s in words))
pat.pattern

'handful|handy|hand|a\\^b'

In [40]:
pat.sub(lambda m: dict[m[0]], 'handful hand pin handy (a^b)')

'3 1 pin 2 (4)'

In [41]:
re.findall(r'ab*c','abc ac adc abbc xabbbcz bbb bc abbbbbc')

['abc', 'ac', 'abbc', 'abbbc', 'abbbbbc']

In [42]:
re.findall(r'a(b*)c','abc ac adc abbc xabbbcz bbb bc abbbbbc')

['b', '', 'bb', 'bbb', 'bbbbb']

In [45]:
re.findall(r'(.*?)/(.*?)/(.*?),','2020/04/25,1986/Mar/02,77/12/31')

[('2020', '04', '25'), ('1986', 'Mar', '02')]

In [46]:
re.finditer(r'ab+c','abc ac adc abbbc')

<callable_iterator at 0x10fa36a90>

In [47]:
m_iter = re.finditer(r'ab+c','abc ac adc abbbc')
for m in m_iter:
    print(m)

<re.Match object; span=(0, 3), match='abc'>
<re.Match object; span=(11, 16), match='abbbc'>


In [48]:
m_iter = re.finditer(r'ab+c','abc ac adc abbbc')
for m in m_iter:
    print(m[0].upper(),m.span(),sep='\t')

ABC	(0, 3)
ABBBC	(11, 16)


In [49]:
print("Hello","world","Hey there",sep='\t')
print("Hellosadfdsf","worldfadsf","Hey therefdsaf",sep='\t')

Hello	world	Hey there
Hellosadfdsf	worldfadsf	Hey therefdsaf


In [51]:
d = '2020/04/25,1986/Mar/02,77/12/31'
m_iter = re.finditer(r'(.*?)/(.*?)/(.*?),',d)
[m.groups() for m in m_iter]

[('2020', '04', '25'), ('1986', 'Mar', '02')]

In [52]:
m_iter = re.finditer(r'(.*?),',d)
[m[1] for m in m_iter]

['2020/04/25', '1986/Mar/02']

In [53]:
[m[1] for m in m_iter]

[]

In [54]:
#re.split
re.split(r'1*4?2','31111111111251111426')

['3', '5', '6']

In [55]:
re.split(r'(1*4?2)','31111111111251111426')

['3', '11111111112', '5', '111142', '6']

In [56]:
re.split(r'(1*)4?2','31111111111251111426')

['3', '1111111111', '5', '1111', '6']

In [57]:
re.split(r'(a+)b+(c+)','3.14aabccc42')

['3.14', 'aa', 'ccc', '42']

In [60]:
re.split(r'(1*)(4)?2','31111111111251111426')

['3', '1111111111', None, '5', '1111', '4', '6']

In [61]:
# re.subn
greeting = 'Have a nice weekend'
re.sub(r'e','E',greeting)

'HavE a nicE wEEkEnd'

In [62]:
re.subn(r'e','E',greeting)

('HavE a nicE wEEkEnd', 5)

In [63]:
word = 'coffining'
while True:
    word, cnt = re.subn(r'fin','',word)
    if cnt == 0:
        break
word

'cog'

In [64]:
# Exercise
str1 = 'This the biggest fruit you have seen?'
str2 = 'Your mission is to read and practice consistently'

In [73]:
pattern = re.compile(r'is.*t')
pattern.search(str1).group()


'is the biggest fruit'

In [74]:
pattern.search(str2).group()

'ission is to read and practice consistent'

In [75]:
s1 = 'match after the last newline'
s2 = 'and then you want to test'
s3 = 'this is good bye then'
s4 = 'who was there to see?'
toMatch = ['is','the','was','to']
pattern = re.compile('|'.join([w for w in toMatch]))

In [78]:
pattern.search(s1).span()[0]

12

In [81]:
pattern.search(s2).span()[0]

4

In [82]:
pattern.search(s3).span()[0]

2

In [83]:
pattern.search(s4).span()[0]

4

In [84]:
s1 = 'match after the last newline'
s2 = 'and then you want to test'
s3 = 'this is good bye then'
s4 = 'who was there to see?'
toMatch = ['is','the','was','to']
pattern = re.compile('|'.join([w for w in toMatch]))

In [90]:
pattern.search(s1)[-1]

IndexError: no such group