In [None]:
import re

Demo 1 - search ()

search() takes the pattern and text to scan, and returns a Match object when the pattern is found.
If the pattern is not found, search() returns None.

In [None]:
pattern = 'this'
text = 'Does this text match the pattern?'

re.search(pattern,text)

In [None]:
print (re.search(pattern,text))

In [None]:
if re.search(pattern,text):
    print ('A match was found')
else:
    print ('No match was found')

Demo 2 - search ()

In [None]:
patterns = [ 'this', 'that' ]
text = 'Does this text match the pattern?'

for pattern in patterns:
    print('Looking for "{}" in "{}" ->'.format(pattern, text))

    if re.search(pattern,  text):
        print('found a match!')
    else:
        print('no match')

Demo 3
match.start() / match.end()

The Match object returned by search() holds information about the
nature of the match, including the original input string, the regular
expression used, and the location within the original string where the
pattern occurs.

In [None]:
pattern = 'this'
text = 'Does this text match this pattern?'

match = re.search(pattern, text)

s = match.start()
e = match.end()

In [None]:
print ('Found "{}" in "{}" from {} to {} ("{}")'.format(pattern, match.string, s, e, text[s:e]))

Demo 4
Multiple Matches
Note that in the last example we had two "this" occurrences. How do we find them all ?

In [None]:
pattern = 'this'
text = 'Does this text match this pattern?'

re.findall(pattern, text)

In [None]:
print ('"{}" was found {} times'.format(pattern, len(re.findall(pattern, text))))

In [None]:
for match in re.findall(pattern, text):
    print(match.upper())

Use finditer() to iterate through match objects in a given string

In [None]:
re.finditer(pattern, text)

for match in re.finditer(pattern, text):
    s = match.start()
    e = match.end()
    print ('Found "{}" at {}:{}'.format(text[s:e], s, e))

Demo 5
Patterns

In [None]:
def search_patterns(pattern, text):
    status = 0
    for match in re.finditer(pattern, text):
        status = 1
        s = match.start()
        e = match.end()
        print('Found "{}" at {}:{}'.format(text[s:e], s, e))
    if status == 0:
        print('No match was found')


text = 'abbaaabbbbaaaaa'

The letters "ab"

In [None]:
search_patterns('ab', text)

The letters "zz

In [None]:
search_patterns('zz', text)

a followed by zero or more b

In [None]:
search_patterns('ab*', text)

a followed by one or more b

In [None]:
search_patterns('ab+', text)

a followed by zero or one b

In [None]:
search_patterns('ab?', text)

a followed by three b

In [None]:
search_patterns('ab{3}', text)

a followed by between two to three b

In [None]:
search_patterns('ab{2,3}', text)

Demo 6
Turning off Greedy-Behavior

The normal processing for a repetition instruction is to consume as much of the input
as possible while matching the pattern. This so-called greedy behavior can be turned off by
following the repetition instruction with ?

In [None]:
string = 'abbaaabbbbaaaaa'

search_patterns('ab*?', string) # a followed by zero or more b

search_patterns('ab+?', string) # a followed by one or more b

search_patterns('ab??', string) # a followed by zero or one b

search_patterns('ab{3}?', string) # a followed by three b

search_patterns('ab{2,3}?', string) # a followed by between two to three b

Demo 7
Character Sets

In [None]:
search_patterns('[ab]', string)    # either a or b

search_patterns('a[ab]+', string)  # a followed by one or more a or b

Demo 8
Except

A character set can also be used to exclude specific characters.
The special marker ^ means to look for characters not in the set following.
This pattern finds all of the substrings that do not contain the characters -, ., or a space.

In [None]:
string = 'This is some text -- with punctuation. Can we remove it? Yes, we can!'

search_patterns('[^-.!? ]+', string) # sequences without -,.,!,? or space

Demo 9
Ranges

As character sets grow larger, typing every character that should (or should not)
match becomes tedious. A more compact format using character ranges lets you define
a character set to include all of the contiguous characters between a start and
stop point.

In [None]:
string = 'This is some text -- with punctuation.'

search_patterns('[a-z]+',string)  # sequences of lower case letters

search_patterns('[A-Z]+',string)  # sequences of upper case letters

search_patterns('[a-zA-Z]+',string)  # sequences of lower or upper case letters

search_patterns('[A-Z][a-z]+',string)  # one upper case letter followed by lower case letters

Demo 10
Ranges

As a special case of a character set the metacharacter dot, or period (.),
indicates that the pattern should match any single character in that position.

In [None]:
string = 'abbaaabbbbaaaaa'

search_patterns('a.',string)  # a followed by any one character

search_patterns('b.',string)  # b followed by any one character

search_patterns('a.*b',string)  # a followed by anything, ending in b

Demo 11
Escape Codes

Escape Codes
\d  a digit
\D  a non-digit
\s  whitespace (tab, space, newline, etc.)
\S  non-whitespace
\w  alphanumeric
\W  non-alphanumeric

In [None]:
string = 'This is a prime #1 example!'

search_patterns(r'\d+', string)  # sequence of digits

search_patterns(r'\D+', string)  # sequence of non-digits

search_patterns(r'\s+', string)  # sequence of whitespace

search_patterns(r'\S+', string)  # sequence of non-whitespace

search_patterns(r'\w+', string)  # alphanumeric characters

search_patterns(r'\W+', string)  # non-alphanumeric

Demo 13
Anchoring

Escape Codes
\d  a digit
\D  a non-digit
\s  whitespace (tab, space, newline, etc.)
\S  non-whitespace
\w  alphanumeric
\W  non-alphanumeric

Anchors
^   start of string, or line
$   end of string, or line
\A  start of string
\Z  end of string
\b  empty string at the beginning or end of a word
\B  empty string not at the beginning or end of a word

In [None]:
string = 'This is some text -- with punctuation.'

search_patterns(r'^\w+',string)  # word at start of string

search_patterns(r'\A\w+',string)  # word at start of string

search_patterns(r'\w+\S*$',string)  # word at end of string, with optional punctuation

search_patterns(r'\w+\S*\Z',string)  # word at end of string, with optional punctuation

search_patterns(r'\w*t\w*',string)  # word containing 't'

search_patterns(r'\bt\w+',string)  # 't' at start of word

search_patterns(r'\w+t\b', string)  # 't' at end of word

Demo 14
Dissecting Matches with Groups

In [None]:
text = 'This is some text -- with punctuation.'

In [None]:
print(text)

In [None]:
for pattern in [ r'^(\w+)',           # word at start of string
                 r'(\w+)\S*$',        # word at end of string, with optional punctuation
                 r'(\bt\w+)\W+(\w+)', # word starting with 't' then another word
                 r'(\w+t)\b',         # word ending with 't'
                 ]:
    regex = re.compile(pattern)
    match = regex.search(text)
    print ('Matching "{}"'.format(pattern))
    print ('  ', match.groups())
    

Demo 15
Dissecting Matches with Groups

In [None]:
text = 'This is some text -- with punctuation.'

In [None]:
print ('Input text : "{}"'.format(text))

word starting with 't' then another word

In [None]:
regex = re.compile(r'(\bt\w+)\W+(\w+)')

In [None]:
print ('Pattern : "{}"'.format(regex.pattern))

In [None]:
match = regex.search(text)

In [None]:
print ('Entire match          :', match.group(0))

In [None]:
print ('Word starting with "t":', match.group(1))

In [None]:
print ('Word after "t" word   :', match.group(2))

In [None]:
print(match.groups())