In [1]:
import re

re.findall(r'b', 'abcd')

['b']

In [2]:
def show_all_matches(regexes, subject, re_length=6):
    print('Sentence:')
    print()
    print(' {}'.format(subject))
    print()
    print(' regex{} | matches'.format(' ' * (re_length - 6)))
    print(' -----{} | -------'.format(' ' * (re_length - 6)))
    for regexp in regexes:
        fmt = ' {:<%d} | {!r}' % re_length
        matches = re.findall(regexp, subject)
        if len(matches) > 8:
            matches = matches[:8] + ['...']
        print(fmt.format(regexp, matches))

In [3]:
sentence = 'Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.'

In [4]:
show_all_matches([r'a', r'm', r'M', r'Mary', 
                  r'little', r'1', r'10', r'22'], 
                sentence)

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex | matches
 ----- | -------
 a      | ['a', 'a', 'a', 'a', 'a']
 m      | ['m', 'm']
 M      | ['M']
 Mary   | ['Mary']
 little | ['little', 'little']
 1      | ['1', '1', '1']
 10     | ['10']
 22     | ['22']


In [6]:
show_all_matches([r'\w', r'\d', r'\s', r'.', r'\.'], sentence)

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex | matches
 ----- | -------
 \w     | ['M', 'a', 'r', 'y', 'h', 'a', 'd', 'a', '...']
 \d     | ['1', '1', '0', '1', '2', '2', '2']
 \s     | [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '...']
 .      | ['M', 'a', 'r', 'y', ' ', 'h', 'a', 'd', '...']
 \.     | ['.', '.', '.']


In [7]:
show_all_matches([r'l\w\w\w\W', r'\d\d'], sentence)

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex | matches
 ----- | -------
 l\w\w\w\W | ['lamb.', 'lamb.']
 \d\d   | ['10', '12', '22']


In [8]:
show_all_matches([r'\d+'], sentence)

print('\n--\n')

show_all_matches([r'a{2,}', r'a{2}', r'a{3,4}'], 'aabbaaaa')

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex | matches
 ----- | -------
 \d+    | ['1', '10', '12', '22']

--

Sentence:

 aabbaaaa

 regex | matches
 ----- | -------
 a{2,}  | ['aa', 'aaaa']
 a{2}   | ['aa', 'aa', 'aa']
 a{3,4} | ['aaaa']


In [9]:
show_all_matches([r'[lt]', 
                  r'[lt]+', 
                  r'[^aeiou\s\.]', # any letter that's not a vowel
                  r'[a-d]'], sentence, re_length=12)

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex       | matches
 -----       | -------
 [lt]         | ['l', 't', 't', 'l', 'l', 'l', 't', 't', '...']
 [lt]+        | ['l', 'ttl', 'l', 'l', 'ttl', 'l', 't', 't', '...']
 [^aeiou\s\.] | ['M', 'r', 'y', 'h', 'd', 'l', 't', 't', '...']
 [a-d]        | ['a', 'a', 'd', 'a', 'a', 'b', 'a', 'b']


In [11]:
show_all_matches([r'\bo\w+', # any word that starts with an o
                  r'^\s', # starts with a space
                  r'^M',  # starts with 'M' 
                  r'\.$' # starts with a period
                 ], sentence)

Sentence:

 Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regex | matches
 ----- | -------
 \bo\w+ | ['one']
 ^\s    | []
 ^M     | ['M']
 \.$    | ['.']


In [12]:
sentence = '''
You can find us on the web at https://codeup.com. Our ip address is 123.123.123.123 (maybe).
'''.strip()

In [13]:
ip_re = r'\d+(\.\d+){3}'

match = re.search(ip_re, sentence)
match[0]

'123.123.123.123'

In [14]:
match

<_sre.SRE_Match object; span=(68, 83), match='123.123.123.123'>

In [15]:
url_re = r'(https?)://(\w+)\.(\w+)'

protocol, domain, tld = re.search(url_re, sentence).groups()

print(f'''
protocol: {protocol}
domain:   {domain}
tld:      {tld}
''')


protocol: https
domain:   codeup
tld:      com



In [17]:
url_re = r'(?P<protocol>https?)://(?:\w+)\.(?P<tld>\w+)'
match = re.search(url_re, sentence)

print(f'''
groups: {match.groups()}
referencing a group by name: {match.group('tld')}
group dictionary: {match.groupdict()}
''')


groups: ('https', 'com')
referencing a group by name: com
group dictionary: {'protocol': 'https', 'tld': 'com'}



In [18]:
# remove anything that's not a digit
re.sub(r'\D', '', 'abc 123')

'123'

In [19]:
# remove anthing that's not a letter
re.sub(r'[^a-z]', '', 'abc 123')

'abc'

In [20]:
re.sub(r'.(.).', r'\1', 'abc')

'b'

In [21]:
re.sub(r'(.)(.)(.)', r'\3\2\1', 'abc')

'cba'

In [22]:
re.sub(r'.{2}$', 'X', 'abc')

'aX'

In [23]:
regexp = r'''
[aeiou] (?# any vowel)
[^aeiou] (?# followed by a non-vowel)
'''

In [24]:
regexp = r'[aeiou][^aeiou]'