In [1]:
import re

## Anchors & boundaries

#### `^`: start of string or start of line depending on multiline mode. (But when [^inside brackets], it means "not")

In [2]:
re.search('^abc', 'abcdefg').group()

'abc'

In [3]:
re.search('^abc.*', 'abc (line start)').group()

'abc (line start)'

#### `$`: end of string or end of line depending on multiline mode. 

In [4]:
re.search('abc$', 'endsinabc').group()

'abc'

In [5]:
re.search('.*? the end$', 'this is the end').group()

'this is the end'

## Characters

#### `\d` matches digit (0-9) & unicode digit

In [6]:
re.search('file\d*', 'file12345').group()

'file12345'

In [7]:
re.search('file_\d\d', 'file_9੩').group()

'file_9੩'

#### `\D` matches a character that is not a digit

In [8]:
re.search('\D{4}', 'A_B+C').group()

'A_B+'

#### `\w` matches ASCII letter, digit, underscore, unicode letter & ideogram

In [9]:
# \w doesn't match asterisk/star
re.search('\w*', 'A-b_1').group()

'A'

In [10]:
re.search('\w-\w\w\w', 'A-b_1').group()

'A-b_1'

In [11]:
re.search('\w-\w\w\w', '字-ま_۳').group()

'字-ま_۳'

#### `\W` matches a character that is not a word character

In [12]:
re.search('\W\W\W\W\W', '*-+=)').group()

'*-+=)'

#### `\s` matches space, tab, newline, carriage return, vertical tab & any unicode separator

In [13]:
re.search('a\sb\sc', 'a b\nc').group()

'a b\nc'

#### `\S` matches a character that is not a whitespace character

In [14]:
re.search('\S\S\S\S', 'Yoyo').group()

'Yoyo'

#### `.` matches any character except line break

In [15]:
re.search('a.c', 'abc').group()

'abc'

In [16]:
re.search('.*', 'whatever, man.').group()

'whatever, man.'

In [17]:
re.search('.*', 'what happen then to\n new line').group()

'what happen then to'

In [18]:
re.search('.*', 'what happen then to\t new tab').group()

'what happen then to\t new tab'

#### `\` escapes a special character
list of special character: __. * + ? $ ^ \\ [ { ( ) } ]__

In [19]:
re.search('a\.c', 'a.c').group()

'a.c'

In [20]:
re.search('\[\{\(\)\}\]', '[{()}]').group()

'[{()}]'

## Character classes

#### `[...]` matches one of the characters in the brackets

In [21]:
re.search('[AEIOU]', 'One uppercasE vowel').group()

'O'

In [22]:
re.findall('[AEIOU]', 'One uppercasE vowel')

['O', 'E']

In [23]:
re.findall('T[ao]p', 'Tap or Top')

['Tap', 'Top']

In [24]:
re.search('[a-e]', 'abcdefgh12345').group()

'a'

In [25]:
# matches from a to e, number 1 & 2
re.findall('[a-e12]', 'abcdefgh12345')

['a', 'b', 'c', 'd', 'e', '1', '2']

In [26]:
re.search('[\x41-\x45]{3}', 'ABE').group()

'ABE'

In [27]:
re.findall('[\x41-\x45]{3}', 'ABE')

['ABE']

#### `[^...]` matches one of the characters NOT in the brackets

In [28]:
# matches characters that are not a to e, 1 & 2
re.findall('[^a-e12]', 'abcdefgh12345')

['f', 'g', 'h', '3', '4', '5']

## Inline Modifiers

#### `(?i)`: case-insensitive mode

In [29]:
re.findall('(?i)Monday', 'monDAY')

['monDAY']

In [30]:
re.search('(?i)Monday', 'monDAY').group()

'monDAY'

#### `(?s)`: DOTALL mode. The dot matches new line characters (\r\n)

In [31]:
re.findall('(?s)From A.*to Z', 'From A\r\n to Z')

['From A\r\n to Z']

In [32]:
re.findall('(?m)1\r\n^2$\r\n^3$', '1\n2\n3')

[]

## Quantifiers

#### `?`: once or none

In [33]:
re.search('plurals?', 'plural').group()

'plural'

In [34]:
# makes quantifiers "lazy"
re.search('\d+?', '12345').group()

'1'

In [35]:
# makes quantifiers "lazy"
re.search('A*?', 'AAA').group()

''

In [36]:
# makes quantifiers "lazy"
re.search('\w{2,4}?', 'abcd').group()

'ab'

#### `*`: 0 or more, "greedy"

In [37]:
re.search('A*B*C*', 'AAACC').group()

'AAACC'

In [38]:
re.search('A*', 'AAA').group()

'AAA'

#### `+`: 1 or more, "greedy"

In [39]:
re.search('Version\s\w-\w+', 'Version A-b1_1').group()

'Version A-b1_1'

In [40]:
re.search('\d+', '12345').group()

'12345'

#### `{2,4}`: 2 to 4 times

In [41]:
re.search('\d{2,4}', '123456').group()

'1234'

#### `{3,}`: 3 or more

In [42]:
re.search('\w{3,}', 'regex_tutorial').group()

'regex_tutorial'

## Logic

#### `|`: Alternation OR operand

In [43]:
re.search('22|33', '33').group()

'33'

#### `(...)`: capturing group

In [44]:
re.search('A(nt|pple)', 'Apple (captures "pple")').group()

'Apple'

#### `(?: ...)`: non-capturing group

In [45]:
re.search('A(?:nt|pple)', 'Apple').group()

'Apple'