# Regular Expressions

In [1]:
import re

In [2]:
patterns = ['term1','term2']

In [3]:
text = 'This is a string with term1, but not the other term'

In [8]:
for pattern in patterns:
    print('Searching for "%s" in: \n"%s"' % (pattern,text),)
    
    #Check for match
    if re.search(pattern,text):
        print('\n')
        print('Match was found. \n')
    else:
        print('\n')
        print('No Match was found. \n')

Searching for "term1" in: 
"This is a string with term1, but not the other term"


Match was found. 

Searching for "term2" in: 
"This is a string with term1, but not the other term"


No Match was found. 



In [9]:
print(re.search('h','w'))

None


In [10]:
match = re.search(patterns[0],text)

In [11]:
type(match)

_sre.SRE_Match

In [12]:
match.start()

22

In [13]:
match.end()

27

In [20]:
split_term = '@'

phrase = 'What is your email, is it hello@gmail.com?'

In [21]:
re.split(split_term,phrase)

['What is your email, is it hello', 'gmail.com?']

In [17]:
'hello world'.split()

['hello', 'world']

In [41]:
re.findall('match','Here is one match, here is another match')

['match', 'match']

## Repetition Syntax

In [35]:
def multi_re_find(patterns,phrase):
    '''
    Takes in a list of regex patterns
    Prints a list of all matches
    '''
    
    for pattern in patterns: 
        print('Searching the phrase using the re check: %r' %pattern)
        print(re.findall(pattern,phrase))
        print('\n')

In [36]:
multi_re_find.__doc__

'\n    Takes in a list of regex patterns\n    Prints a list of all matches\n    '

In [39]:
testphrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd...sdd'
testpatterns = ['sd*',    # s followed by zero or more d's
               'sd+',     # s followed by one or more d's
               'sd?',     # s followed by zero or one d's
               'sd{3}',   # s followed by three d's
               'sd{2,3}', # s followed by two to three d's
               ]
multi_re_find(testpatterns,testphrase)

Searching the phrase using the re check: 'sd*'
['sd', 'sd', 's', 's', 'sddd', 'sddd', 'sddd', 'sd', 's', 's', 's', 's', 's', 's', 'sdddd', 'sdd']


Searching the phrase using the re check: 'sd+'
['sd', 'sd', 'sddd', 'sddd', 'sddd', 'sd', 'sdddd', 'sdd']


Searching the phrase using the re check: 'sd?'
['sd', 'sd', 's', 's', 'sd', 'sd', 'sd', 'sd', 's', 's', 's', 's', 's', 's', 'sd', 'sd']


Searching the phrase using the re check: 'sd{3}'
['sddd', 'sddd', 'sddd', 'sddd']


Searching the phrase using the re check: 'sd{2,3}'
['sddd', 'sddd', 'sddd', 'sddd', 'sdd']




## Character Sets

In [45]:
testphrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd'

testpatterns = [ '[sd]',    # either s or d
               's[sd]+']     # s followed by one or more s or d

multi_re_find(testpatterns,testphrase)

Searching the phrase using the re check: '[sd]'
['s', 'd', 's', 'd', 's', 's', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 'd', 's', 'd', 's', 'd', 's', 's', 's', 's', 's', 's', 'd', 'd', 'd', 'd']


Searching the phrase using the re check: 's[sd]+'
['sdsd', 'sssddd', 'sdddsddd', 'sds', 'sssss', 'sdddd']




## Exclusion

In [46]:
testphrase = 'This is a string! But it has punctutation. How can we remove it?'

In [47]:
re.findall('[^!.? ]+',testphrase)

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctutation',
 'How',
 'can',
 'we',
 'remove',
 'it']

## Character Ranges

In [48]:
testphrase = 'This is an example sentence. Lets see if we can find some letters.'

testpatterns =['[a-z]+',       # sequences of lower case letters
               '[A-Z]+',       # sequences of upper case letters
               '[a-zA-Z]+',    # sequences of lower or upper case letters
               '[A-Z][a-z]+']  # one uppercase letter followed by lower case letters

multi_re_find(testpatterns,testphrase)

Searching the phrase using the re check: '[a-z]+'
['his', 'is', 'an', 'example', 'sentence', 'ets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: '[A-Z]+'
['T', 'L']


Searching the phrase using the re check: '[a-zA-Z]+'
['This', 'is', 'an', 'example', 'sentence', 'Lets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: '[A-Z][a-z]+'
['This', 'Lets']




## Escape Codes

In [49]:
testphrase = 'This is a string with some numbers 1233 and a symbol #hashtag'

testpatterns = [r'\d+',   # sequence of digits    
                r'\D+',   # sequence of non-digits
                r'\s+',   # sequence of whitespace
                r'\S+',   # sequence of non-whitespace
                r'\w+',   # alphanumeric characters
                r'\W+',   # non-alphanumeric
               ]

multi_re_find(testpatterns,testphrase)

Searching the phrase using the re check: '\\d+'
['1233']


Searching the phrase using the re check: '\\D+'
['This is a string with some numbers ', ' and a symbol #hashtag']


Searching the phrase using the re check: '\\s+'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']


Searching the phrase using the re check: '\\S+'
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', '#hashtag']


Searching the phrase using the re check: '\\w+'
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', 'hashtag']


Searching the phrase using the re check: '\\W+'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' #']


