## Getting Started

In [1]:
import re

In [2]:
p = re.compile(r'r[aeiou]se')
result = p.search('A rose is a rose is a rose.')
print(result)

<_sre.SRE_Match object; span=(2, 6), match='rose'>


## Module-level Method

In [3]:
re.search(r'r[aeiou]se', 'A rose is a rose is a rose.')

<_sre.SRE_Match object; span=(2, 6), match='rose'>

## Raw String Notation

In [4]:
print('a\nb\nc')
print('a\\nb\\nc')
print(r'a\nb\nc')

a
b
c
a\nb\nc
a\nb\nc


In [5]:
cooked = re.search('\babc\b','''abc''')
cookedandprepped = re.search('\\babc\\b','''abc''')
raw = re.search(r'\babc\b','''abc''')
print(cooked, cookedandprepped, raw, sep='\n')

None
<_sre.SRE_Match object; span=(0, 3), match='abc'>
<_sre.SRE_Match object; span=(0, 3), match='abc'>


## Splitting on a Pattern

In [6]:
p = re.compile(r'\W')
p.split('andré@example.com')

['andré', 'example', 'com']

## Flags

#### re.IGNORECASE

In [7]:
p = re.compile('Foo',re.IGNORECASE)
p.findall('foobar')

['foo']

#### re.MULTILINE

In [8]:
emails = '''andre@example.com
andré@example.com'''
matches1 = re.findall(r'^\w+@\w+\.\w+$', emails)
matches2 = re.findall(r'^\w+@\w+\.\w+$', emails, re.MULTILINE)
print(matches1, matches2, sep="\n")

[]
['andre@example.com', 'andré@example.com']


#### re.DOTALL

In [9]:
emails = '''andre@example.com
andré@example.com'''
matches1 = re.findall(r'm.a', emails)
matches2 = re.findall(r'm.a', emails, re.DOTALL)
print(matches1, matches2, sep="\n")

[]
['m\na']


#### re.ASCII

In [10]:
p = re.compile(r'\w+@\w+\.\w+',re.ASCII)
match1 = p.findall('andre@example.com')
match2 = p.findall('andré@example.com')
print(match1, match2, sep="\n")

['andre@example.com']
[]


#### re.VERBOSE

In [11]:
pattern = r'''^     #start here
            \w+    #text before the @ symbol
            @      #@ symbol
            \w+    #text between the @ and .
            \.     #literal dot
            \w+    #text after .
            $      #end here'''
emails = '''andre@example.com
andré@example.com'''
re.findall(pattern, emails, re.VERBOSE|re.MULTILINE)

['andre@example.com', 'andré@example.com']

#### re.DEBUG

In [12]:
p = re.compile(r'\b(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w\.-]*)*\/?\b',re.DEBUG)

AT AT_BOUNDARY
MAX_REPEAT 0 1
  SUBPATTERN 1 0 0
    LITERAL 104
    LITERAL 116
    LITERAL 116
    LITERAL 112
    MAX_REPEAT 0 1
      LITERAL 115
    LITERAL 58
    LITERAL 47
    LITERAL 47
SUBPATTERN 2 0 0
  MAX_REPEAT 1 MAXREPEAT
    IN
      CATEGORY CATEGORY_DIGIT
      RANGE (97, 122)
      LITERAL 46
      LITERAL 45
LITERAL 46
SUBPATTERN 3 0 0
  MAX_REPEAT 2 6
    IN
      RANGE (97, 122)
      LITERAL 46
MAX_REPEAT 0 MAXREPEAT
  SUBPATTERN 4 0 0
    MAX_REPEAT 0 MAXREPEAT
      IN
        LITERAL 47
        CATEGORY CATEGORY_WORD
        LITERAL 46
        LITERAL 45
MAX_REPEAT 0 1
  LITERAL 47
AT AT_BOUNDARY


## Groups

In [13]:
p = re.compile(r'(\w+)@(\w+\.(\w+))')
match = p.match('andre@example.com')
email = match.group(0)
handle = match.group(1)
domain = match.group(2)
domain_type = match.group(3)
print(email, handle, domain, domain_type, sep='\n')

print(match.groups())

andre@example.com
andre
example.com
com
('andre', 'example.com', 'com')


In [14]:
p = re.compile(r'(?P<handle>\w+)@(?P<domain>\w+\.(?P<domain_type>\w+))')
match = p.match('andre@example.com')
email = match.group(0)
handle = match.group('handle')
domain = match.group('domain')
domain_type = match.group('domain_type')
print(email, handle, domain, domain_type, sep='\n')

andre@example.com
andre
example.com
com


## sub() with Function

In [15]:
import random
def clean_cuss(match):
    cuss = match.group(0)
    l = len(cuss)
    s = '!@#$%^&*'
    while l > len(s):
        s += s
    return ''.join(random.sample(s,l))

p = re.compile(r'\b[a-z]*(stupid|stinky|darn|shucks|crud|slob)[a-z]*\b', re.IGNORECASE|re.MULTILINE)
s = '''Shucks! What a cruddy day I\'ve had. I spent the whole darn day \
with my slobbiest friend darning his stinky socks.'''
result = p.sub(clean_cuss,s)
result

"!&@*$%! What a #@$!%& day I've had. I spent the whole !^%# day with my &#%*^*&@% friend &@!#^*% his ^#$@*% socks."