# More Pattern Matching with Regular Expressions


## Special RegEx characters 
- `.  ^  $  *  +  ?  {  }  [  ]  \  |  (  )`
- they should be escpaed if they are used literally as in \n special characters in string. E:
    - `\.`
    - `\^`

In [1]:
# .  ^  $  *  +  ?  {  }  [  ]  \  |  (  )


In [16]:
import re
phoneNumberRegex = re.compile (r'\d\d\d-\d\d\d-\d\d\d\d')
phoneNumberRegex.search('123-234-3456 dummy text').group()


'123-234-3456'

In [19]:
import re
phoneNumberRegex = re.compile (r'\(\d\d\d\)-\d\d\d-\d\d\d\d')
phoneNumberRegex.search('(123)-234-3456 dummy text').group()


'(123)-234-3456'

## Matching multiple groups with the Pipe
- `|` - the pipe character

Matching Multiple Groups with the Pipe


In [20]:
import re
heroRegex = re.compile (r'Batman|Tina Fey')

In [21]:
mo1 = heroRegex.search('Batman and Tina Fey')
mo1

<re.Match object; span=(0, 6), match='Batman'>

In [22]:
mo1.group()

'Batman'

In [23]:
mo1 = heroRegex.search('Batmen and Tina Fey')
mo1.group()

'Tina Fey'

In [5]:
heroRegex.findall('Batman and Tina Fey')

['Batman', 'Tina Fey']

## use pipe to match one of several patterns

In [24]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
batRegex.search('Batmobile lost a wheel').group()

'Batmobile'

In [25]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
batRegex.findall('Batmobile lost a wheel when Batman was driving.')

['mobile', 'man']

## making the regex pattern optional
- group before `?` is optional in RegEx pattern


In [26]:
batRegex = re.compile(r'Bat(wo)?man')
batRegex.search('The Adventures of Batman').group()

'Batman'

In [9]:
batRegex.search('The Adventures of Batwoman').group()

'Batwoman'

## Matching Zero or More with the Star
- we can specify to match multiple repetitions of specific group

In [27]:
batRegex = re.compile(r'Bat(wo)*man')
batRegex.search('The Adventures of Batman').group()

'Batman'

In [28]:
batRegex.search('The Adventures of Batwowowowoman').group()

'Batwowowowoman'

## Matching One or More with the Plus


In [32]:
batRegex = re.compile(r'Bat(wo)+man')
mo1 = batRegex.search('The Adventures of Batwoman')
mo1.group()

'Batwoman'

In [13]:
batRegex.search('The Adventures of Batwowowowoman').group()

'Batwowowowoman'

In [33]:
batRegex.search('The Adventures of Batman').group()

AttributeError: 'NoneType' object has no attribute 'group'

## Matching Specific Repetitions with Braces
- sometimes we want exactly n repetitions of the sequence/group

In [37]:
haRegex = re.compile(r'(Ha){1}')
haRegex.search('HaHaHa').group()

'Ha'

In [19]:
mo2 = haRegex.search('Ha')
mo2.group()

AttributeError: 'NoneType' object has no attribute 'group'

# The findall() Method


In [39]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') # has groups
phoneNumRegex.search('Cell: 415-555-9999 Work: 212-555-0000').group()


'415-555-9999'

In [41]:
phoneNumRegex = re.compile(r'([0-9]\d\d)-(\d\d\d)-(\d\d\d\d)') # has groups
phoneNumRegex.search('Cell: 415-555-9999 Work: 212-555-0000').group()


'415-555-9999'

In [22]:
res = phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')
res[0]

('415', '555', '9999')

# Character Classes
![](2022-11-15-11-47-29.png)

In [30]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') # has groups
phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')

[('415', '555', '9999'), ('212', '555', '0000')]

In [42]:
phoneNumRegex = re.compile(r'\D\D\D\D:') # has groups
phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')

['Cell:', 'Work:']

In [44]:
xmasRegex = re.compile(r'[0123456789]+\s\w+')
xmasRegex.findall('12 drummers, 11 pipers_ 10 lords, 9 ladies, 8 maids, \
    7 swans, 6 geese, 5 rings, 4 birds, 3 hens, 2 doves, 1 partridge')

['12 drummers',
 '11 pipers_',
 '10 lords',
 '9 ladies',
 '8 maids',
 '7 swans',
 '6 geese',
 '5 rings',
 '4 birds',
 '3 hens',
 '2 doves',
 '1 partridge']

## Making your own Character classes

In [45]:
vowelRegex = re.compile(r'[aeiouAEIOU]')
vowelRegex.findall('RoboCop eats baby food. BABY FOOD.')

['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']

In [None]:
s 

## The Caret and Dollar Sign Characters


In [46]:
beginsWithHello = re.compile(r'^Hello')
beginsWithHello.search('Hello, world!')

<re.Match object; span=(0, 5), match='Hello'>

In [47]:
'Hello, world!'[0:5]

'Hello'

In [48]:
beginsWithHello.search('This Hello, world!')==None

True

In [50]:
res = beginsWithHello.search('This Hello, world!')
print(res)

None


In [51]:
endsWithNumber = re.compile(r'\d$')
endsWithNumber.search('Your number is 42')

<re.Match object; span=(16, 17), match='2'>

In [54]:
endsWithNumber.search('Your number is 42.')==None

True

## The Wildcard Character

In [55]:
atRegex = re.compile(r'.at')

In [56]:
atRegex.findall('The cat in the hat sat on the flat mat.')

['cat', 'hat', 'sat', 'lat', 'mat']

### Matching Everything with Dot-Star

In [58]:
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
nameRegex.search('First Name: Al some redundant text Last Name: Sweigart').group()

'First Name: Al some redundant text Last Name: Sweigart'

# Substituting Strings with the sub() Method


In [66]:
namesRegex = re.compile(r'Agent (.*)')
namesRegex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.')

'CENSORED'

In [58]:
namesRegex = re.compile(r'Agent \w(.*)')
namesRegex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.')

'CENSORED'

# Summary 
![](2022-11-15-12-10-41.png)

In [69]:
ourRegEx = re.compile(r'[A-Za-z]{2}\d{3}')
ourRegEx.findall("Awesome free AI classes on youtube are CS229 and cs231")

['CS229', 'cs231']