# Python 正規表示法

In [1]:
import re

## 數字

In [2]:
# \d 一個數字
str = 'Today is 06 May 2019.'
mo = re.search('\d\d\d\d', str)
print(mo)
print(mo.group())
print(mo.start(), mo.end())

<_sre.SRE_Match object; span=(16, 20), match='2019'>
2019
16 20


## 英文字母

In [3]:
# [] 或
str = 'Today is 2019-MAY-06'
mo = re.search('\d\d\d\d-[A-Z][A-Z][A-Z]-\d\d', str)
print(mo.group())

2019-MAY-06


In [4]:
# A-Z 大寫英文字母 a-z 小寫英文字母
str = 'Today is 2019-May-06'
mo = re.search('\d\d\d\d-[A-Z][a-z][a-z]-\d\d', str)
print(mo.group())

# A-Za-z 大小寫英文字母
str = 'Today is 2019-May-06'
mo = re.search('\d\d\d\d-[A-Za-z][A-Za-z][A-Za-z]-\d\d', str)
print(mo.group())

# re.IGNORECASE 忽略英文字母大小寫
str = 'Today is 2019-May-06'
mo = re.search('\d\d\d\d-[A-Z][A-Z][A-Z]-\d\d', str, re.IGNORECASE)
print(mo.group())

2019-May-06
2019-May-06
2019-May-06


## 連續

In [5]:
# {a} 連續a個
str = 'Today is 2012-May-06'
mo = re.search('\d{4}', str, re.IGNORECASE)
print(mo.group())

2012


In [6]:
# {a, b} 連續a個以上、b個以下
# {a,} 連續a個以上，找最多
str = 'Today is 2019-May-06'
mo = re.search('\d{1,4}-[A-Za-z]{1,}-\d{1,2}', str)
print(mo.group())

2019-May-06


In [7]:
# + 連續1個以上
str = 'abc123'
mo = re.search('\d+', str)
print(mo.group())

123


In [8]:
# * 連續0個以上
str = 'abc123'
mo = re.search('\d*', str)
print(mo.group())




In [9]:
# ? 連續0個以上，1個以下
str = 'abc123'

mo = re.search('\d?', str)
print(mo.group())




## Group

In [10]:
# () group
str = 'Today is 2019-May-06'
mo = re.search('(\d{4})-([A-Za-z]{3})-(\d{2})', str)

print('Group:')
print(mo.group())
print(mo.group(0))
print(mo.group(1))
print(mo.group(2))
print(mo.group(3))
print()

print('Groups:')
print(mo.groups())
print(len(mo.groups()))

Group:
2019-May-06
2019-May-06
2019
May
06

Groups:
('2019', 'May', '06')
3


## 斷言

In [11]:
# ?= 後面需有
# ?! 後面不可有
str = '2019-May'
mo = re.search('(2019-(?=[A-Za-z]{3}))', str)
print(mo.group())

mo = re.search('(2019(?![A-Za-z]{3}))', str)
print(mo.group())

2019-
2019


## 頭尾、跨行

In [12]:
# (.*) 中間任何字

str = 'BEGIN hello world END'
mo = re.search('BEGIN(.*)END', str)
print(mo.group())

BEGIN hello world END


In [13]:
str = '''BEGIN hello 
world END'''
print(str)
print()

mo = re.search('BEGIN(.*)END', str)
print(mo)
print()

# re.DOTALL 跨行
mo = re.search('BEGIN(.*)END', str, re.DOTALL)
print(mo.group())

BEGIN hello 
world END

None

BEGIN hello 
world END


## 搜尋

In [14]:
# findall 搜尋
str = 'Hello Taipei, hello Taiwan, hello world'
l = re.findall('hello', str, re.IGNORECASE)
print(l)

['Hello', 'hello', 'hello']


## 替代

In [15]:
# sub 替換
str = 'Hello Taipei, hello Taiwan, hello word'
s = re.sub('word', 'world', str, re.IGNORECASE)
print(s)

Hello Taipei, hello Taiwan, hello world
