#### How to Write and Match Regular Expressions (Regex)

In [1]:
import re

In [2]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890

Ha HaHa

MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )

coreyms.com

321-555-4321
123.555.1234
123*555*1234
800-555-1234
900-555-1234

Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
'''

sentence = 'Start a sentence and then bring it to an end'

In [3]:
pattern = re.compile(r"abc")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(1, 4), match='abc'>


In [4]:
# if we use (.) with out excape \
pattern = re.compile(r".")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match) #it returns weird all characters

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.M

In [5]:
#so we need to use escape \ with all the MetaCharacter (Need to be excaped) 
# . ^ $ * + ? {} [] \ | ()
pattern = re.compile(r"\.")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(113, 114), match='.'>
<re.Match object; span=(149, 150), match='.'>
<re.Match object; span=(171, 172), match='.'>
<re.Match object; span=(175, 176), match='.'>
<re.Match object; span=(223, 224), match='.'>
<re.Match object; span=(254, 255), match='.'>
<re.Match object; span=(267, 268), match='.'>


In [6]:
pattern = re.compile(r"coreyms\.com")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(142, 153), match='coreyms.com'>


In [7]:
"""
.       - Any Character Except New Line
\d      - Digit (0-9)
\D      - Not a Digit (0-9)
\w      - Word Character (a-z, A-Z, 0-9, _)
\W      - Not a Word Character
\s      - Whitespace (space, tab, newline)
\S      - Not Whitespace (space, tab, newline)

\b      - Word Boundary
\B      - Not a Word Boundary
^       - Beginning of a String
$       - End of a String

[]      - Matches Characters in brackets
[^ ]    - Matches Characters NOT in brackets
|       - Either Or
( )     - Group

Quantifiers:
*       - 0 or More
+       - 1 or More
?       - 0 or One
{3}     - Exact Number
{3,4}   - Range of Numbers (Minimum, Maximum)"""

'\n.       - Any Character Except New Line\n\\d      - Digit (0-9)\n\\D      - Not a Digit (0-9)\n\\w      - Word Character (a-z, A-Z, 0-9, _)\n\\W      - Not a Word Character\n\\s      - Whitespace (space, tab, newline)\n\\S      - Not Whitespace (space, tab, newline)\n\n\x08      - Word Boundary\n\\B      - Not a Word Boundary\n^       - Beginning of a String\n$       - End of a String\n\n[]      - Matches Characters in brackets\n[^ ]    - Matches Characters NOT in brackets\n|       - Either Or\n( )     - Group\n\nQuantifiers:\n*       - 0 or More\n+       - 1 or More\n?       - 0 or One\n{3}     - Exact Number\n{3,4}   - Range of Numbers (Minimum, Maximum)'

In [8]:

# . - Any Character Except New Line
pattern = re.compile(r".")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.M

In [9]:
# \d      - Digit (0-9)
pattern = re.compile(r"\d")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(55, 56), match='1'>
<re.Match object; span=(56, 57), match='2'>
<re.Match object; span=(57, 58), match='3'>
<re.Match object; span=(58, 59), match='4'>
<re.Match object; span=(59, 60), match='5'>
<re.Match object; span=(60, 61), match='6'>
<re.Match object; span=(61, 62), match='7'>
<re.Match object; span=(62, 63), match='8'>
<re.Match object; span=(63, 64), match='9'>
<re.Match object; span=(64, 65), match='0'>
<re.Match object; span=(155, 156), match='3'>
<re.Match object; span=(156, 157), match='2'>
<re.Match object; span=(157, 158), match='1'>
<re.Match object; span=(159, 160), match='5'>
<re.Match object; span=(160, 161), match='5'>
<re.Match object; span=(161, 162), match='5'>
<re.Match object; span=(163, 164), match='4'>
<re.Match object; span=(164, 165), match='3'>
<re.Match object; span=(165, 166), match='2'>
<re.Match object; span=(166, 167), match='1'>
<re.Match object; span=(168, 169), match='1'>
<re.Match object; span=(169, 170), match='2'>
<re.Matc

In [10]:
# \D    -Not a digit (0-9)
pattern = re.compile(r"\D")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Ma

In [11]:
# \w   -Word Character (a-z, A-Z, 0-9, _)
pattern = re.compile(r"\w")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.M

In [12]:
data=[]
for i in range(1, 51):
    name = f"Faraz Gill {i}"
    data.append([name])
    
for i in data:
    i[0] = re.sub(r"\d", "", i[0])
    
print(data)

[['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill '], ['Faraz Gill ']]


In [13]:
for item in data:
    print(item[0])

Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 
Faraz Gill 


In [15]:
# \W    - Not a Word Character
pattern = re.compile(r"\W")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(27, 28), match='\n'>
<re.Match object; span=(54, 55), match='\n'>
<re.Match object; span=(65, 66), match='\n'>
<re.Match object; span=(66, 67), match='\n'>
<re.Match object; span=(69, 70), match=' '>
<re.Match object; span=(74, 75), match='\n'>
<re.Match object; span=(75, 76), match='\n'>
<re.Match object; span=(90, 91), match=' '>
<re.Match object; span=(91, 92), match='('>
<re.Match object; span=(96, 97), match=' '>
<re.Match object; span=(99, 100), match=' '>
<re.Match object; span=(102, 103), match=' '>
<re.Match object; span=(110, 111), match=')'>
<re.Match object; span=(111, 112), match=':'>
<re.Match object; span=(112, 113), match='\n'>
<re.Match object; span=(113, 114), match='.'>
<re.Match object; span=(114, 115), match=' '>
<re.Match object; span=(115, 116), match='^'>
<re.Match object; span=(116, 117), match=' '>
<re.Match object; span=(117, 118), match='$'>
<re.Match object; span=(118, 119), match=' '>
<re.M

In [17]:
# \s    - Whitespace (space, tab, newline)
pattern = re.compile(r"\s")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(27, 28), match='\n'>
<re.Match object; span=(54, 55), match='\n'>
<re.Match object; span=(65, 66), match='\n'>
<re.Match object; span=(66, 67), match='\n'>
<re.Match object; span=(69, 70), match=' '>
<re.Match object; span=(74, 75), match='\n'>
<re.Match object; span=(75, 76), match='\n'>
<re.Match object; span=(90, 91), match=' '>
<re.Match object; span=(96, 97), match=' '>
<re.Match object; span=(99, 100), match=' '>
<re.Match object; span=(102, 103), match=' '>
<re.Match object; span=(112, 113), match='\n'>
<re.Match object; span=(114, 115), match=' '>
<re.Match object; span=(116, 117), match=' '>
<re.Match object; span=(118, 119), match=' '>
<re.Match object; span=(120, 121), match=' '>
<re.Match object; span=(122, 123), match=' '>
<re.Match object; span=(124, 125), match=' '>
<re.Match object; span=(126, 127), match=' '>
<re.Match object; span=(128, 129), match=' '>
<re.Match object; span=(130, 131), match=' '>
<re

In [18]:
# \S   - Not a Whitespace (space, tab, newline)
pattern = re.compile(r"\S")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.M

#### Anchors

In [22]:

# \b   - Word Boundary
pattern = re.compile(r"\bHa")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(67, 69), match='Ha'>
<re.Match object; span=(70, 72), match='Ha'>


In [24]:
# \B   - Not a Word Boundary
pattern = re.compile(r"\BHa")
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(72, 74), match='Ha'>


In [26]:
# ^   - Beggining of a String
pattern = re.compile(r"^Start")
matches = pattern.finditer(sentence)

for match in matches:
    print(match)

<re.Match object; span=(0, 5), match='Start'>


In [33]:
# $   - End of a String
pattern = re.compile(r"end$")
matches = pattern.finditer(sentence)

for match in matches:
    print(match)

<re.Match object; span=(41, 44), match='end'>
