# Regex Fluency Drills
Series of short Regex exercises designed to build muscle memory through small reasoning bursts.

In [1]:
import re

## 1: Extract Order IDs

In [1]:
log_transaction = ["Order#A123 confirmed on 2024-06-15",
"Order#B009 failed at payment",
"Ref: X9 pending"
]

In [20]:
valid_order = []
for i in log_transaction:
    valid_order += (re.findall(r'#[A-Z]\d{3}', i))

print(valid_order)

['#A123', '#B009']


# 2: Clean and Standardize Phone Numbers

In [21]:
phone_numbers = ["+61 412-345-678", "0412 345678", "(04)12345678", "04 12 34 56 78"]

In [39]:
clean_numbers = []

for i in phone_numbers:
    num = (re.sub(r'\D', '', i))
    if num.startswith('61'):
        num = '0' + num[2:]
    clean_numbers.append(num)

formated = [f"{n[:4]} {n[4:7]} {n[7:]}" for n in clean_numbers]    
print(set(formated))

{'0412 345 678'}


# 3: Detect Valid Email Addresses

In [40]:
emails = [
    "john.doe@gmail.com",
    "mary@@yahoo.com",
    "info@uni.edu",
    "data_science@site",
    "carol-123@company.org"
]

In [48]:
valid_emails = []
pattern = r'^[\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,4}$'

for i in emails:
    valid_emails += re.findall(pattern, i)
print(valid_emails)

['john.doe@gmail.com', 'info@uni.edu', 'carol-123@company.org']


# 4: Extract Domain Names from Emails

In [49]:
emails = [
    "alex.smith@gmail.com",
    "data_team@research.org",
    "info@uni.edu",
    "contact@company.co.uk"
]

In [59]:
domains = []
pattern = r'@([\w-]+)\.'

for i in emails:
    domains += re.findall(pattern, i)

print(domains)
    

['gmail', 'research', 'uni', 'company']


# 5: Reformat Dates

In [60]:
dates = [
    "2025-11-12",
    "2023-01-05",
    "2024-09-27"
]

In [68]:
formatted_dates = []

for i in dates:
    date = re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3/\2/\1', i)
    formatted_dates.append(date)
print(formatted_dates)

['12/11/2025', '05/01/2023', '27/09/2024']


# 6: Extract and Transform Text Patterns in Sentences
Extract valid order IDs only

    * Valid IDs start with # followed by a capital letter and exactly 3 digits.

    * Use \b to ensure you don’t match “A999” (no #).

Extract dates and reformat to “DD-MM-YYYY”

    * Capture with grouping: (\d{2})/(\d{2})/(\d{4}).

    * Use re.sub() with back-references \1, \2, etc.

Extract customer names

    * Each sentence includes “by <Name>”.

    * Capture the word right after “by ”.

    * Hint: use a positive lookbehind (?<=by\s) or a capturing group (by\s)([A-Z][a-z]+).

Combine results into a list of dictionaries

In [64]:
sentences = [
    "Order ID #A321 was placed by John on 12/11/2025.",
    "Payment failed for #B002 on 05/01/2023.",
    "Shipment for #C087 confirmed by Mary on 27/09/2024.",
    "Note: temporary ID A999 not valid."
]

In [65]:
out = []

for i in sentences:
    add = {}
    add['order_id'] = re.findall(r'#[A-Z]\d{3}', i)
    
    clean_date = re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3/\2/\1', i)
    add['date'] = re.findall(r'\d{2}/\d{2}/\d{4}',clean_date)

    add['name'] = re.findall(r'by\s([A-Z][a-z]+)', i)
    
    out.append(add)

In [66]:
out

[{'order_id': ['#A321'], 'date': ['12/11/2025'], 'name': ['John']},
 {'order_id': ['#B002'], 'date': ['05/01/2023'], 'name': []},
 {'order_id': ['#C087'], 'date': ['27/09/2024'], 'name': ['Mary']},
 {'order_id': [], 'date': [], 'name': []}]