# 8.12 Introduction to Regular Expressions

#### *re* Module and Function *fullmatch*

In [1]:
import re

#### Matching Literal Characters

In [2]:
pattern = '02215'

In [3]:
'Match' if re.fullmatch(pattern, '02215') else 'No Match'

'Match'

In [4]:
'Match' if re.fullmatch(pattern, '51220') else 'No Match'

'No Match'

#### Metacharacters, Character Classes and Quantifiers

 - \ character classes
 - {} quantifier
 - [] custom character class
 - () capture substrings in a match
 - \* zero or more ocurrences
 - \+ at least one ocurrence
 - ^ any character that is not specified / only the beginning of a string
 - $ only the end of a string
 - ? zero or one
 - . one character
 - |

In [7]:
'Valid' if re.fullmatch('\d{5}', '76589') else 'Invalid'

'Valid'

In [8]:
'Valid' if re.fullmatch('\d{5}', '9506') else 'Invalid'

'Invalid'

#### Other Predefined Character Classes

- \d Any digit(0-9).
- \D Any *not* digit.
- \s Any whitespace character(such as spaces, tabs and newlines).
- \S Any *not* whitespace characters.
- \w Any alphanumeric character.
- \W Any *not* alphanumeric character.

#### Custom Character Classes

In [9]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'Wally') else 'Invalid'

'Valid'

In [10]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'eva') else 'Invalid'

'Invalid'

In [11]:
'Valid' if re.fullmatch('[^a-z]', 'A') else 'Invalid'

'Valid'

In [13]:
'Valid' if re.fullmatch('[^a-z]', 'a') else 'Invalid'

'Invalid'

In [14]:
'Valid' if re.fullmatch('[*+$]', '$') else 'Invalid'

'Valid'

In [15]:
'Valid' if re.fullmatch('[*+$]', '-') else 'Invalid'

'Invalid'

#### * vs. + Quantifier

In [16]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'Wally') else 'Invalid'

'Valid'

In [17]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'E') else 'Invalid'

'Invalid'

#### Other Quantifiers

In [18]:
'Match' if re.fullmatch('labell?ed', 'labelled') else 'No Match'

'Match'

In [19]:
'Match' if re.fullmatch('labell?ed', 'labeled') else 'No Match'

'Match'

In [20]:
'Match' if re.fullmatch('labell?ed', 'labellled') else 'No Match'

'No Match'

In [21]:
'Match' if re.fullmatch('\d{3,}', '123') else 'No Match'

'Match'

In [22]:
'Match' if re.fullmatch('\d{3,}', '1235234') else 'No Match'

'Match'

In [23]:
'Match' if re.fullmatch('\d{3,}', '12') else 'No Match'

'No Match'

In [24]:
'Match' if re.fullmatch('\d{3,6}', '123') else 'No Match'

'Match'

In [25]:
'Match' if re.fullmatch('\d{3,6}', '123456') else 'No Match'

'Match'

In [26]:
'Match' if re.fullmatch('\d{3,6}', '1234567') else 'No Match'

'No Match'

In [27]:
'Match' if re.fullmatch('\d{3,6}', '12') else 'No Match'

'No Match'

### Replacing Substrings and Splitting Strings

#### Function *sub* Replacing Strings

In [28]:
re.sub(r'\t', ', ', '1\t2\t3\t4')

'1, 2, 3, 4'

In [29]:
re.sub(r'\t', ', ', '1\t2\t3\t4', count=2)

'1, 2, 3\t4'

 #### Function *split*

In [30]:
re.split(r',\s*', '1, 2, 3,4,   5,6,7,8')

['1', '2', '3', '4', '5', '6', '7', '8']

In [31]:
re.split(r',\s*', '1, 2, 3,4,   5,6,7,8', maxsplit=3)

['1', '2', '3', '4,   5,6,7,8']

### Other Search Functions: Accessing Matches

#### Function *search* Finding the First Match Anywhere in a String

In [37]:
result = re.search('Python', 'Python is fun')

In [38]:
result.group() if result else 'not found'

'Python'

In [39]:
result2 = re.search('fun!', 'Python is fun')

In [41]:
result2.group() if result2 else 'not found'

'not found'

#### Ignoring Case with Optional *flags* Keyword Argument

In [42]:
result3 = re.search('Sam', 'SAM WHITE', flags=re.IGNORECASE)

In [43]:
result3.group() if result3 else 'not found'

'SAM'

#### Metacharacters That Restrict Matches to the Beginning or End of a String

In [44]:
result = re.search('^Python', 'Python is fun')

In [45]:
result.group() if result else 'not found'

'Python'

In [46]:
result = re.search('^fun', 'Python is fun')

In [47]:
result.group() if result else 'not found'

'not found'

In [48]:
result = re.search('Python$', 'Python is fun')

In [49]:
result.group() if result else 'not found'

'not found'

In [50]:
result = re.search('fun$', 'Python is fun')

In [51]:
result.group() if result else 'not found'

'fun'

#### Function *findall* and *finditer* Finding All Matches is a String

In [52]:
contact = 'Wally White, Home: 555-555-1234, Work: 555-555-4321'

In [53]:
re.findall(r'\d{3}-\d{3}-\d{4}', contact)

['555-555-1234', '555-555-4321']

In [54]:
for phone in re.finditer(r'\d{3}-\d{3}-\d{4}', contact):
    print(phone.group())

555-555-1234
555-555-4321


#### Capturing Substrings in a Match ()

In [55]:
text = ' Charlie Cyan, e-mail: demol@deitel.com'

In [56]:
pattern = r'([A-Z][a-z]+ [A-Z][a-z]+), e-mail: (\w+@\w+\.\w{3})'

In [57]:
result = re.search(pattern, text)

In [59]:
result.groups()

('Charlie Cyan', 'demol@deitel.com')

In [60]:
result.group()

'Charlie Cyan, e-mail: demol@deitel.com'

In [62]:
result.group(1)

'Charlie Cyan'

In [63]:
result.group(2)

'demol@deitel.com'