In [1]:
# Use the 're' module. Regex is for pattern matching in text.
import re

In [None]:
text = "1 Hello there! MY email is test.user123@example.com and my backup is test.alt@mail.co.uk. You can call me at 9876543210 or 8765432109. Let's meet at 5:00 PM on 20-04-2025."

#### common methods use

In [5]:
# re.match(pattern, string) - Checks for a match only at the beginning of the string
match = re.match(r'\d+', text)
if match:
    print("found number: ", match.group())

found number:  1


In [7]:
# re.search(pattern, string) - Searches the entire string for the first match
match = re.search(r'[A-Z]+', text)
if match:
    print("upper forund: ", match.group())

upper forund:  H


In [10]:
# re.findall(pattern, string) - Returns a list of all non-overlapping matches
email_pattern = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'
match = re.findall(email_pattern, text)
if match:
    print('emails: ', match)

emails:  ['test.user123@example.com', 'test.alt@mail.co.uk.']


In [12]:
# re.finditer(pattern, string) - Returns an iterator yielding match objects
for match in re.finditer(r'\d{1,2}:\d{2}', text):
    print("Time found:", match.group())

Time found: 5:00


In [None]:
# re.sub(pattern, repl, string) - Replaces matches with a string
masked_text = re.sub(r'\d+', 'X', text)
print(masked_text)

X Hello there! My email is test.userX@example.com and my backup is test.alt@mail.co.uk. You can call me at X or X. Let's meet at X:X PM on X-X-X.


In [None]:
# re.split(pattern, string) - Splits the string by the occurrences of the pattern
parts = re.split(r'[\s,!]+', text)
print(parts)

['1', 'Hello', 'there', 'My', 'email', 'is', 'test.user123@example.com', 'and', 'my', 'backup', 'is', 'test.alt@mail.co.uk.', 'You', 'can', 'call', 'me', 'at', '9876543210', 'or', '8765432109.', "Let's", 'meet', 'at', '5:00', 'PM', 'on', '20-04-2025.']


In [15]:
# re.compile(pattern) - Compiles a regex pattern for reuse
phone_pattern = re.compile(r'\d{10}')
phones = phone_pattern.findall(text)
print("Phone numbers:", phones)

Phone numbers: ['9876543210', '8765432109']


In [None]:
""" 
Pattern - Meaning
. - Any character except newline
^ - Start of string
$ - End of string
* - 0 or more repetitions
+ - 1 or more repetitions
? - 0 or 1 repetition
{m} - Exactly m repetitions
{m,n} - Between m and n repetitions
[] - Matches any single character in brackets
` - `
\d - Digit (0–9)
\D - Non-digit
\w - Word character (a-z, A-Z, 0-9, _)
\W - Non-word character
\s - Whitespace
\S - Non-whitespace 
"""