In [1]:
import re

Basic Operations

In [2]:
text = "Hello, World!"
match = re.search(r"World", text)
if match:
    print("Found:", match.group())
else:
    print("Not found.")

Found: World


Finding All Matches: re.findall()

In [3]:
text = "The rain in Spain"
matches = re.findall(r"ain", text)
print(matches)  # ['ain', 'ain']

['ain', 'ain']


Splitting a String: re.split()

In [4]:
text = "The rain in Spain"
parts = re.split(r"\s", text)  # Split at each whitespace
print(parts)  # ['The', 'rain', 'in', 'Spain']


['The', 'rain', 'in', 'Spain']


Replacing Text: re.sub()

In [5]:
text = "The rain in Spain"
new_text = re.sub(r"Spain", "France", text)
print(new_text)  # The rain in France


The rain in France


Pattern Syntax

In [6]:
text = "My number is 123-456-7890."
match = re.search(r"(\d{3})-(\d{3})-(\d{4})", text)
if match:
    print(match.groups())  # ('123', '456', '7890')
    print("Area code:", match.group(1))  # Area code: 123

('123', '456', '7890')
Area code: 123


Flags

In [7]:
text = "hello world"
match = re.search(r"WORLD", text, re.IGNORECASE)
if match:
    print("Found:", match.group())  # Found: world


Found: world


Basic Date Pattern Matching

In [8]:
text = "Today's date is 14/03/2024. The previous meeting was on 12-02-2024, and the next one will be on 2024/04/16."

# Pattern to match the dates
pattern = r"\b(?:\d{2}[-/]\d{2}[-/]\d{4}|\d{4}/\d{2}/\d{2})\b"

dates = re.findall(pattern, text)
print(dates)  # ['14/03/2024', '12-02-2024', '2024/04/16']

['14/03/2024', '12-02-2024', '2024/04/16']


Advanced Date Extraction

In [9]:
pattern = r"\b(?:(\d{2})[-/](\d{2})[-/](\d{4})|(\d{4})/(\d{2})/(\d{2}))\b"

matches = re.finditer(pattern, text)
for match in matches:
    # Check which format was matched
    if match.group(1):  # DD-MM-YYYY or DD/MM/YYYY
        day, month, year = match.group(1, 2, 3)
    else:  # YYYY/MM/DD
        year, month, day = match.group(4, 5, 6)
    
    print(f"Day: {day}, Month: {month}, Year: {year}")


Day: 14, Month: 03, Year: 2024
Day: 12, Month: 02, Year: 2024
Day: 16, Month: 04, Year: 2024


Handling Different Date Formats with Named Groups

In [10]:
pattern = r"\b(?:"
pattern += r"(?P<day>\d{2})[-/](?P<month>\d{2})[-/](?P<year>\d{4})"  # DD-MM-YYYY or DD/MM/YYYY
pattern += r"|"
pattern += r"(?P<year2>\d{4})/(?P<month2>\d{2})/(?P<day2>\d{2})"  # YYYY/MM/DD
pattern += r")\b"

matches = re.finditer(pattern, text)
for match in matches:
    # Extract using named groups
    day = match.group('day') or match.group('day2')
    month = match.group('month') or match.group('month2')
    year = match.group('year') or match.group('year2')
    
    print(f"Day: {day}, Month: {month}, Year: {year}")


Day: 14, Month: 03, Year: 2024
Day: 12, Month: 02, Year: 2024
Day: 16, Month: 04, Year: 2024


Validating Date Formats

In [11]:
def validate_date(date_str):
    # Matches DD-MM-YYYY, DD/MM/YYYY, YYYY/MM/DD
    pattern = r"^(?:(\d{2})[-/](\d{2})[-/](\d{4})|(\d{4})/(\d{2})/(\d{2}))$"
    return bool(re.match(pattern, date_str))

print(validate_date("31-02-2024"))  # False - February doesn't have 31 days
print(validate_date("15/03/2024"))  # True


True
True


Common Patterns Examples

In [26]:
# Emails
print(re.findall(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "My email is example@test.com."))

# URLs
print(re.findall(r"https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&//=]*)", "Visit http://www.example.com"))

# Dates (DD-MM-YYYY)
print(re.findall(r"\b\d{2}[-/]\d{2}[-/]\d{4}\b", "Today is 14-03-2024."))

# Phone Numbers ((123) 456-7890)
print(re.findall(r"\(\d{3}\) \d{3}-\d{4}", "Call me at (123) 456-7890."))


['example@test.com']
['http://www.example.com']
['14-03-2024']
['(123) 456-7890']
