In [1]:
import re

### 匹配单个字符

In [2]:
# 匹配某个字符串
text = 'hello'
ret = re.match('he', text)
print(ret.group())

he


In [6]:
# . 匹配任意单个字符, 不能匹配换行符 \n
text = 'hello'
ret = re.match('.', text)
print(ret.group())

h


In [11]:
# \d 匹配任意单个数字
text = '123'
ret = re.match('\d', text)
print(ret.group())

1


In [14]:
# \D 匹配任意非数字
text = '[]3hello'
ret = re.match('\D', text)
print(ret.group())

[


In [19]:
# \s 匹配空白字符：换行\n, 制表\t, 换行符\r
text = '   '
ret = re.match('\s', text)
print(ret.group())

 


In [27]:
# \w 匹配a-z A-Z 数字 下划线
text = 'aBzZ32_ ___'
ret = re.search('\w+', text)
print(ret.group())

aBzZ32_


In [28]:
# \W 与\w 想反

In [32]:
# []组合方式，匹配满足中括号中的所有
text = 'aBzZ32_ ___'
ret = re.match('[a1]', text)
print(ret.group())

a


In [38]:
text = '8888-666666'
ret = re.match('[\d-]+', text)
print(ret.group())

8888-666666


In [45]:
# \d 的替代
text = '123fes '
ret = re.match('[0-9]+', text)
print(ret.group())

123


In [41]:
# \D 的替代
text = 'fef&*_ 123'
ret = re.match('[^0-9]+', text)
print(ret.group())

fef&*_ 


In [49]:
# \w 的替代
text = '23f098JIF_jefio'
ret = re.match('[a-zA-Z0-9_]+', text)
print(ret.group())

23f098JIF_jefio


In [50]:
# \W 的替代
text = '&*23f098JIF_jefio'
ret = re.match('[^a-zA-Z0-9_]+', text)
print(ret.group())

&*


### 匹配多个字符

In [54]:
# * 匹配0或任意多个字符，匹配不到不会报错
text = '2343aa'
ret = re.match('\d*', text)
print(ret.group())

2343


In [55]:
# + 匹配1或多个字符，匹配不到会报错
text = '23z43aa_AZ'
ret = re.match('\w+', text)
print(ret.group())

23z43aa_AZ


In [57]:
# ? 0个或1个
text = '23z43aa_AZ'
ret = re.match('\w?', text)
print(ret.group())

2


In [59]:
# {m} 匹配m个字符
text = '23z43aa_AZ'
ret = re.match('\w{6}', text)
print(ret.group())

23z43a


In [64]:
# {m,n} 匹配m到n个字符
text = '23z'
ret = re.match('\w{2,5}', text)
print(ret.group())

23z


### 案例

In [68]:
# 手机号码
text = '18653188888'
ret = re.match('1[34578]\d{9}', text)
print(ret.group())

18653188888


In [72]:
# 邮箱
text = 'hahaha@qq.com'
ret = re.match('\w+@[a-z0-9]+\.[a-z]+', text)
print(ret.group())

hahaha@qq.com


In [75]:
# URL
text = 'http://www.baidu.com/'
ret = re.match('(http|https|ftp)://[^\s]+', text)
print(ret.group())

http://www.baidu.com/


In [77]:
# 身份证
text = '37010419990120325X'
ret = re.match('\d{17}[\dxX]', text)
print(ret.group())

37010419990120325X


In [10]:
# 脱字号 ^ 
text = 'you hello'

# 必须以h开头
ret = re.search('^h', text)
print(ret.group())

AttributeError: 'NoneType' object has no attribute 'group'

In [16]:
# $ 以...结尾
text = 'hahaha@163.com'
ret = re.match('\w+@163.com$', text)
print(ret.group())

hahaha@163.com


In [17]:
# | 匹配多个表达式
text = 'http://www.baidu.com/'
ret = re.match('(http|https|ftp)://[^\s]+', text)
print(ret.group())

http://www.baidu.com/


In [25]:
# 贪婪模式
text = '<h1>标题</h1>'
ret = re.match('<.+>', text)
print(ret.group())

<h1>标题</h1>


In [27]:
# 非贪婪模式
text = '<h1>标题</h1>'
ret = re.match('<.+?>', text)
print(ret.group())

<h1>


#### 匹配0-100间的数字

In [35]:
# 不能出现09等
text = '100'
ret = re.match('[1-9]\d?$|100$', text)
print(ret.group())

100


### 转义字符

In [40]:
text = 'apple price is $299.'
ret = re.search('\$\d+', text)
print(ret.group())

$299


In [44]:
text = '\\n'

# python:
# \\\\n  ->  \\n
# re:
# \\n  ->  \n
ret = re.search('\\\\n', text)
print(ret.group())

\n


In [45]:
text = '\\n'
ret = re.search(r'\\n', text)
print(ret.group())

\n


### 分组

In [56]:
text = "apple's price is $299, orange's price is $233"
ret = re.search('.*(\$\d+).*(\$\d+)', text)
print(ret.group())
print(ret.group(1))
print(ret.group(2))
print(ret.group(1,2))
print(ret.groups())

apple's price is $299, orange's price is $233
$299
$233
('$299', '$233')
('$299', '$233')


### findall

In [61]:
text = "apple's price is $299, orange's price is $233"
ret = re.findall('\$\d+', text)
print(ret)

['$299', '$233']


### sub(替换)

In [62]:
text = "apple's price is $299, orange's price is $233"
ret = re.sub('\$\d+', '0', text)
print(ret)

apple's price is 0, orange's price is 0
