# Regular Expression
A regular expression (aka regex) is a sequence of characters that specifies a search pattern in texts. 

___


#### `.` Dot matches any character except a newline.



In [2]:
import re
text = "hello, he, hakko, heo"
print(re.findall(r"h.o", text))

['heo']


### `[]` indicate a set of characters. 


In [3]:
text = "a, b, /, ?, A"
print(re.findall(r"[a-z]", text))

['a', 'b']


### `\d` Matches any character which is a decimal digit. 
### `\D` Matches any character which is not a decimal digit. 


In [10]:
text = "a b A B ? , 4 5 6"
print(re.findall(r"\d", text)) 
print(re.findall(r"\D", text)) 


['4', '5', '6']
['a', ' ', 'b', ' ', 'A', ' ', 'B', ' ', '?', ' ', ',', ' ', ' ', ' ']


### `\w` Matches any alphanumeric characters. (字母數字)
### `\W` Matches any non alphanumeric characters, such as +, ?, !


In [11]:
text = "a b A B ? , 4 5 6"
print(re.findall(r"\w", text)) 
print(re.findall(r"\W", text)) 

['a', 'b', 'A', 'B', '4', '5', '6']
[' ', ' ', ' ', ' ', '?', ' ', ',', ' ', ' ', ' ']


### `\s` Matches the whitespaces.
### `\s` Matches the non whitespaces.

In [12]:
text = "a bAB ?,4 56"
print(re.findall(r"\s", text))  
print(re.findall(r"\S", text)) 

[' ', ' ', ' ']
['a', 'b', 'A', 'B', '?', ',', '4', '5', '6']


### `*` Matches 0 or more occurrence of something. 
### `+` Matches 1 or more occurrence of something. 

In [13]:
text = "a ab abbb abbbb"
print(re.findall(r"ab*", text))
print(re.findall(r"ab+", text))

['a', 'ab', 'abbb', 'abbbb']
['ab', 'abbb', 'abbbb']


### `{m}` Specifies that exactly m copies of RE. 
### `{m,n}` Specifies that from m to n copies of RE. 
### `{m,}` Specifies m or more copies of RE.

In [15]:
text = "hello1 hello12 hello123 hello123456789"
print(re.findall(r"hello\d{1}", text))
print(re.findall(r"hello\d{1,3}", text))
print(re.findall(r"hello\d{1,}", text))

['hello1', 'hello1', 'hello1', 'hello1']
['hello1', 'hello12', 'hello123', 'hello123']
['hello1', 'hello12', 'hello123', 'hello123456789']


### `\.` Since dot has a special meaning in RE, use \. to find dot in a string.


In [16]:
text = "a b c d 1 2 3 4 . ! ?"
print(re.findall(r"\.", text))

['.']


### `\b` matches the empty string at the beginning or end of a word.

In [19]:
text = "this island is good isn't it?"
print(re.findall(r"is\b", text))
print(re.findall(r"\bis", text))
print(re.findall(r"\bis\b", text))

['is', 'is']
['is', 'is', 'is']
['is']
