Convert Dates from YYYY-MM-DD to MM/DD/YYYY

In [None]:
import re

date_str = '2024-08-26'
date_regex = r'(\d{4})-(\d{2})-(\d{2})'

# Convert to MM/DD/YYYY format
formatted_date = re.sub(date_regex, r'\2/\3/\1', date_str)
print(formatted_date)  # Outputs: 08/26/2024

08/26/2024


Extract and Reformat Phone Numbers

In [None]:
import re

phone_str = '1234567890'
phone_regex = r'(\d{3})(\d{3})(\d{4})'

# Convert to (123) 456-7890 format
formatted_phone = re.sub(phone_regex, r'(\1) \2-\3', phone_str)
print(formatted_phone)  # Outputs: (123) 456-7890

(123) 456-7890


Extract Email Domain from Email Address

In [None]:
import re

email_str = 'user@example.com'
email_regex = r'@([a-zA-Z0-9.-]+)'

# Extract domain
match = re.search(email_regex, email_str)
domain = match.group(1) if match else ''
print(domain)  # Outputs: example.com


example.com


Remove HTML Tags

In [None]:
import re

html_str = '<p>Hello <b>World</b>!</p>'
html_regex = r'<[^>]+>'

# Remove HTML tags
plain_text = re.sub(html_regex, '', html_str)
print(plain_text)  # Outputs: Hello World!


Hello World!


Format Credit Card Numbers

In [None]:
import re

card_str = '1234567890123456'
card_regex = r'(\d{4})(\d{4})(\d{4})(\d{4})'

# Convert to 1234-5678-9012-3456 format
formatted_card = re.sub(card_regex, r'\1-\2-\3-\4', card_str)
print(formatted_card)  # Outputs: 1234-5678-9012-3456


1234-5678-9012-3456


Removing Emojis

In [None]:
import re

emoji_pattern = re.compile(
    "[\U0001F600-\U0001F64F"  # Emoticons
    "\U0001F300-\U0001F5FF"  # Misc Symbols and Pictographs
    "\U0001F680-\U0001F6FF"  # Transport and Map Symbols
    "\U0001F700-\U0001F77F"  # Alchemical Symbols
    "\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
    "\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
    "\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
    "\U0001FA00-\U0001FA6F"  # Chess Symbols
    "\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
    "\U00002702-\U000027B0"  # Dingbats
    "\U000024C2-\U0001F251"  # Enclosed Characters
    "]+", flags=re.UNICODE)
text = "Hello 🌍! How are you? 😊"
emojis = emoji_pattern.findall(text)
print(emojis)  # Outputs: ['🌍', '😊']


['🌍', '😊']


In [None]:
text_with_emojis = "Hello 🌍! How are you? 😊"
text_without_emojis = re.sub(emoji_pattern, '', text_with_emojis)
print(text_without_emojis)  # Outputs: Hello ! How are you?


Hello ! How are you? 


Removing punctuation

In [None]:
import re
import string

text = "Hello, world! How's everything?"
# Regex to match punctuation
cleaned_text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)
print(cleaned_text)  # Outputs: Hello world Hows everything


Hello world Hows everything


Finding Repeated Words

In [None]:
import re

text = "This is a test test to check for errors."

# Regex to find repeated words
corrected_text = re.sub(r'\b(\w+)\s+\1\b', r'\1', text)
print(corrected_text)  # Outputs: This is a test to check for errors.


This is a test to check for errors.


Removing Extra Spaces

In [None]:
import re

text = "This  is  a   test  with  extra spaces."

# Regex to replace multiple spaces with a single space
corrected_text = re.sub(r'\s+', ' ', text).strip()
print(corrected_text)  # Outputs: This is a test with extra spaces.


This is a test with extra spaces.


In [None]:
text =  "This  is  a   test  with  extra spaces."
cleaned_text = ' '.join(text.split())
print(cleaned_text)  # Outputs: This is a sample text.

This is a test with extra spaces.


Removing Numbers

In [None]:
import re

text = "The price is 100 dollars."
cleaned_text = re.sub(r'\d+', '', text)
print(cleaned_text)  # Outputs: The price is  dollars.


The price is  dollars.


Email Validation

In [None]:
import re

email_regex = r'^[^\s@]+@[^\s@]+\.[^\s@]+$'
email = "example@example.com"

is_valid = re.match(email_regex, email) is not None
print(is_valid)  # Outputs: True or False


True


Validating Dates in YYYY-MM-DD Format

In [1]:
import re

date_regex = r'^\d{4}-\d{2}-\d{2}$'
date = '20-08-26'

# Check if the date matches the regex
is_valid = re.match(date_regex, date) is not None
print(is_valid)  # Outputs: True


False


Removing URLs

In [None]:
import re

text = "Check out this website: https://example.com."
cleaned_text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
print(cleaned_text)  # Outputs: Check out this website: .


Check out this website: 
