In [None]:
import re

data = "hello this is python programming"

# using match -> finds match only at the beginning of the string
match = re.match(r"hello", data)
if match:
    print("Match found:", match.group())
    print("result.span():", match.span()) # position of matched string
else:
    print("No match found")

Match found: hello
result.span(): (0, 5)


In [7]:
# case insensitive match
match = re.match(r"HELLO", data, re.IGNORECASE)
if match:
    print("Case insensitive match found:", match.group())
else:
    print("No match found")

Case insensitive match found: hello


In [10]:
# empty pattern
match = re.match(r"", data)
if match:
    print("Empty pattern match found:", match.group())
else:
    print("No match found")

Empty pattern match found: 


In [11]:
# search -> finds match anywhere in the string
search = re.search(r"python", data)
if search:
    print("Search found:", search.group())
    print("result.span():", search.span()) # position of matched string
else:
    print("No match found")

Search found: python
result.span(): (14, 20)


In [14]:
# multiple possible matches -> returns only the first match

data = "cat bat rat mat $ cat @ sat cat cat sat"
result = re.search(r"cat", data)
print("result.group():", result.group())

result.group(): cat


In [15]:
# find the Special characters $, @ in the string
result_dollar = re.search(r"\$", data)
print("Dollar found:", result_dollar.group())

Dollar found: $


In [17]:
# re.findall() -> returns all matches as a list
result = re.findall(r"cat", data)
print("All matches of 'cat':", result)

# not match
not_match = re.search(r"dog", data)
print("Not match (should be None):", not_match)

All matches of 'cat': ['cat', 'cat', 'cat', 'cat']
Not match (should be None): None


In [18]:
# with groups (returns tuple)
data = "John: 555-1234, Jane: 555-5678"
result = re.findall(r"(\d+)-(\d+)", data)
print("Phone number groups:", result)

Phone number groups: [('555', '1234'), ('555', '5678')]


In [21]:
# re.sub() -> replace matches
data = "The cat sat on the mat."
replaced_data = re.sub(r"cat", "dog", data)
print("Replaced data:", replaced_data)

Replaced data: The dog sat on the mat.


In [None]:
# counting matches to replace
data = "cat bat cat rat cat mat"
new_string = re.sub(r"cat","dog", data,count=2)
print("String after replacing 2 'cat':", new_string)

String after replacing 2 'cat': dog bat dog rat cat mat


In [25]:
# dot (.) wildcard -> matches any character except newline
data = "cat bat rat mat"
result = re.findall(r".at", data)
print("Matches for .at:", result)

Matches for .at: ['cat', 'bat', 'rat', 'mat']


In [27]:
# ^ start and $ end anchors
data = "hello world"
result = re.match(r"^hello", data)
print(result.group() if result else "No match for ^hello")

hello


In [28]:
# \d ,\D , \w \W , \s , \s examples
data = "User_123 logged in at 10:30 AM on 2023-11-25!"

digits = re.findall(r"\d", data)
non_digits = re.findall(r"\D", data)
words = re.findall(r"\w", data)
non_words = re.findall(r"\W", data)
spaces = re.findall(r"\s", data)
non_spaces = re.findall(r"\S", data)
print("Digits:", digits)
print("Non-Digits:", non_digits)
print("Words:", words)
print("Non-Words:", non_words)
print("Spaces:", spaces)
print("Non-Spaces:", non_spaces)

Digits: ['1', '2', '3', '1', '0', '3', '0', '2', '0', '2', '3', '1', '1', '2', '5']
Non-Digits: ['U', 's', 'e', 'r', '_', ' ', 'l', 'o', 'g', 'g', 'e', 'd', ' ', 'i', 'n', ' ', 'a', 't', ' ', ':', ' ', 'A', 'M', ' ', 'o', 'n', ' ', '-', '-', '!']
Words: ['U', 's', 'e', 'r', '_', '1', '2', '3', 'l', 'o', 'g', 'g', 'e', 'd', 'i', 'n', 'a', 't', '1', '0', '3', '0', 'A', 'M', 'o', 'n', '2', '0', '2', '3', '1', '1', '2', '5']
Non-Words: [' ', ' ', ' ', ' ', ':', ' ', ' ', ' ', '-', '-', '!']
Spaces: [' ', ' ', ' ', ' ', ' ', ' ', ' ']
Non-Spaces: ['U', 's', 'e', 'r', '_', '1', '2', '3', 'l', 'o', 'g', 'g', 'e', 'd', 'i', 'n', 'a', 't', '1', '0', ':', '3', '0', 'A', 'M', 'o', 'n', '2', '0', '2', '3', '-', '1', '1', '-', '2', '5', '!']


In [31]:
# [] (character classes) example and  | or operator
# Example [] 
data = "Abc123 Xyz456"
result = re.findall(r"[A-Z]", data)
print("Uppercase letters:", result)
result = re.findall(r"[a-z]", data)
print("Uppercase letters:", result)

# find vowels
data = "This is an example string."
vowels = re.findall(r"[aeiouAEIOU]", data)
print("Vowels in the string:", vowels)

Uppercase letters: ['A', 'X']
Uppercase letters: ['b', 'c', 'y', 'z']
Vowels in the string: ['i', 'i', 'a', 'e', 'a', 'e', 'i']


In [35]:
# example | or operator
data = "I have a cat and a dog."
result = re.findall(r"cat|dog", data)
print("Matches for 'cat' or 'dog':", result)

Matches for 'cat' or 'dog': ['cat', 'dog']


In [38]:
# edge case : negation in [] (^inside square brackets)
data = "abcdefg12345!@#"
result = re.findall(r"[^aeiou]", data)
print("Non-vowel characters:", result)

Non-vowel characters: ['b', 'c', 'd', 'f', 'g', '1', '2', '3', '4', '5', '!', '@', '#']


In [41]:
# Quantifiers *, +, ?, {n}, {n,}, {n,m} examples -> capture groups of characters

# example *** 
data = "aaab aaaa aaaa aaaa"
result_star = re.findall(r"a*", data)
print("Matches for a* :", result_star)

Matches for a* : ['aaa', '', '', 'aaaa', '', 'aaaa', '', 'aaaa', '']


In [42]:
# example +
data = "aaab aaaa aaaa aaaa"
result_plus = re.findall(r"a+", data)
print("Matches for a+ :", result_plus)

Matches for a+ : ['aaa', 'aaaa', 'aaaa', 'aaaa']


In [43]:
# exmaple ?
data = "color colour colr"
result_question = re.findall(r"colou?r", data)
print("Matches for colou?r :", result_question)

Matches for colou?r : ['color', 'colour']


In [48]:
# example with {n,m}
data = "123456 aaa aa aaaaa aaaaaaa"
result = re.findall(r"a{4,6}", data)
print("Matches for a{2,4} :", result)

result = re.findall(r"\d{2,3}", data)
print("Matches for a{2,4} :", result)

Matches for a{2,4} : ['aaaaa', 'aaaaaa']
Matches for a{2,4} : ['123', '456']


In [50]:
# group ((),(?:))
data = '234-56-7890, 123-45-6789'
result = re.search(r"(\d{3})-(\d{2})-(\d{4})", data)
print("Full match:", result.group(0))
print("Area code:", result.group(1))
print("Central office code:", result.group(2))
print("Line number:", result.group(3))
    

Full match: 234-56-7890
Area code: 234
Central office code: 56
Line number: 7890


In [51]:
data = "Mr. John Doe, Ms. Jane Smith, Dr. Emily Davis"
result = re.search(r"(?:Mr|Ms|Dr)\. (\w+)", data)
print("Title and first name match:", result.group(0))
print("First name captured:", result.group(1))


Title and first name match: Mr. John
First name captured: John


In [59]:
# find all email addresses in a text
# create email list 

pattern = r"^[\w\.-]+@[\w\.-]+\.\w+"
data = ['support@cwpc.in', 'info@cwpc.in']
for email in data:
    print(f"Email: {email}, Match: {bool(re.match(pattern, email))}")


Email: support@cwpc.in, Match: True
Email: info@cwpc.in, Match: True


In [66]:
# replacing sensitive words
data = 'card 12345-6789-0123-4567,expiry 12/25'
masked_data = re.sub(r"-\d{4}-\d{4}-\d{4}", "***** **** **** ****", data)
print("Masked data:", masked_data)

Masked data: card 12345***** **** **** ****,expiry 12/25
