# Simple string searching
- methods on string class
- useful but limited functionality

In [1]:
s = "foozapbar"
s.index('zap')

3

In [2]:
# substring 

'zap' in s

True

In [3]:
s.startswith('foo')

True

In [4]:
s.endswith('bar')

True

# Regular Expressions
- very powerful, widely used
- syntax a tad cryptic at first glance
- Python has a fairly standard implementation, similar what other languages provide
- [doc](https://docs.python.org/3.5/library/re.html)

In [5]:
# this pattern will find substrings that start with 'x', end with 'y'
# and has one or more digits in the middle

import re

pat = 'x[0-9]+y'
s = 'zxcvx9784843845ysdfx234yzX333Ycv234'

In [6]:
# compile the regular pattern for speed

rec = re.compile(pat)
rec

re.compile(r'x[0-9]+y', re.UNICODE)

In [7]:
# find all substrings that match the pattern
# note match is case sensistive

rec.findall(s)

['x9784843845y', 'x234y']

In [8]:
# case insensitive search w/o compiling pattern

re.findall(pat, s, re.IGNORECASE)

['x9784843845y', 'x234y', 'X333Y']

In [9]:
# replace the pattern with a string

rec.sub('FOOBAR', s)

'zxcvFOOBARsdfFOOBARzX333Ycv234'

In [10]:
# split on the pattern

rec.split(s)

['zxcv', 'sdf', 'zX333Ycv234']

# decrypt with RE

In [11]:
e = '{SVIu6Python-)dKct@\\JK)2is:y:=;;~6reallyMZ-&Bk`*6great!NB!|Krj##'

In [12]:
words = re.findall('[0-9][^0-9]+', e)
words

['6Python-)dKct@\\JK)', '2is:y:=;;~', '6reallyMZ-&Bk`*', '6great!NB!|Krj##']

In [13]:
for word in words:
    ln = int(word[0])
    decode = word[1:ln+1]
    print(decode)

Python
is
really
great!


# RE groups

In [14]:
mo = re.match('[^0-9]+([0-9])[^0-9]+([0-9])[^0-9]+([0-9])[^0-9]+([0-9])', e)
mo

<_sre.SRE_Match object; span=(0, 49), match='{SVIu6Python-)dKct@\\JK)2is:y:=;;~6reallyMZ-&Bk`*>

In [15]:
mo.groups()

('6', '2', '6', '6')