# Python program to demonstrate STRING MANIPULATION and REGULAR EXPRESSIONS

## Regular Expressions

#### Python's built-in re module provides functions and methods to work with regular expressions
### 1. Pattern Matching

In [27]:
import re

# Example text
text = "Please contact us at support@example.com for more details."

# Regular expression pattern to match an email address
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'

# Search for the email in the text
match = re.search(email_pattern, text)

if match:
    print("Found email:", match.group())
else:
    print("No email found.")

Found email: support@example.com


### 2. Matching Functions

In [34]:
# 1. re.search()
#This function searches the entire string for the first match of the pattern.
# Search for the first occurrence of the word "apple"
text = "I have an apple, a banana, and an orange."
match = re.search(r'apple', text)

if match:
    print("Found:", match.group())

Found: apple


In [36]:
# 2. re.match()
#This function checks if the pattern matches the beginning of the string.
# Match the pattern at the beginning of the string
text = "apple is my favorite fruit."
match = re.match(r'apple', text)

if match:
    print("Match found:", match.group())

Match found: apple


In [38]:
# 3. re.findall()
#This function finds all occurrences of the pattern in the string and returns them as a list.
# Find all the fruit names
text = "I have an apple, a banana, and an orange."
matches = re.findall(r'\b\w+\b', text)

print("All words:", matches)

All words: ['I', 'have', 'an', 'apple', 'a', 'banana', 'and', 'an', 'orange']


In [93]:
# 4. re.sub()
#This function in Python is used to substitute or replace occurrences of a pattern within a string with a new string.
#re.sub(pattern, replacement, string, count=0, flags=0)
import re

text = "My number is 9876543210"
result = re.sub(r'\d', 'X', text)
print("Modified text:", result)

text = "Contact us at john.doe@example.com for more info."
result = re.sub(r'\w+@\w+\.\w+', 'REDACTED', text)
print("Modified text:", result)

Modified text: My number is XXXXXXXXXX
Modified text: Contact us at john.REDACTED for more info.


### 3. Regex Patterns

In [51]:
# \d - Matches any digit
text = "My phone number is 123-456-7890."
digits = re.findall(r'\d+', text)
print("Digits found:", digits)

text = "My phone number is 9876543210"
digits = re.findall(r'\d', text)
print("Digits found:", digits)

Digits found: ['123', '456', '7890']
Digits found: ['9', '8', '7', '6', '5', '4', '3', '2', '1', '0']


In [53]:
# \D - Matches any non-digit character
text = "Order ID: ABC1234XYZ"
non_digits = re.findall(r'\D', text)
print("Non-digits found:", non_digits)

Non-digits found: ['O', 'r', 'd', 'e', 'r', ' ', 'I', 'D', ':', ' ', 'A', 'B', 'C', 'X', 'Y', 'Z']


In [57]:
# \w - Matches any word character (letters, digits, or underscores)
text = "The file name is my_file_123.txt"
words = re.findall(r'\w+', text)
print("Words found:", words)

text = "Username: user_123"
alphanumeric = re.findall(r'\w', text)
print("Alphanumeric characters:", alphanumeric)

Words found: ['The', 'file', 'name', 'is', 'my_file_123', 'txt']
Alphanumeric characters: ['U', 's', 'e', 'r', 'n', 'a', 'm', 'e', 'u', 's', 'e', 'r', '_', '1', '2', '3']


In [59]:
# \W - Matches any non-alphanumeric character
text = "Hello, World! @2024"
non_alphanumeric = re.findall(r'\W', text)
print("Non-alphanumeric characters:", non_alphanumeric)

Non-alphanumeric characters: [',', ' ', '!', ' ', '@']


In [61]:
# \s - Matches any whitespace character (space, tab, newline, etc.)
text = "This is\ta test\nstring."
whitespace = re.findall(r'\s', text)
print("Whitespace characters:", whitespace)

Whitespace characters: [' ', '\t', ' ', '\n']


In [63]:
# \S - Matches any non-whitespace character
text = "Whitespace 123!"
non_whitespace = re.findall(r'\S', text)
print("Non-whitespace characters:", non_whitespace)

Non-whitespace characters: ['W', 'h', 'i', 't', 'e', 's', 'p', 'a', 'c', 'e', '1', '2', '3', '!']


In [65]:
# \b - Matches a word boundary
text = "Hello, world!"
words = re.findall(r'\b\w+', text)
print("Words:", words)

Words: ['Hello', 'world']


In [67]:
# \B - Matches a non-word boundary
text = "ABBA"
non_word_boundary = re.findall(r'\B\w', text)
print("Non-word boundary matches:", non_word_boundary)

Non-word boundary matches: ['B', 'B', 'A']


In [69]:
# \A - Matches the start of a string
text = "Start here and continue."
match_start = re.findall(r'\AStart', text)
print("Start of the string match:", match_start)

Start of the string match: ['Start']


In [71]:
# \Z - Matches the end of a string
text = "Continue until the end."
match_end = re.findall(r'end\Z', text)
print("End of the string match:", match_end)

End of the string match: []


In [73]:
# . - Matches any character (except a newline)
text = "Hello! How are you?"
any_characters = re.findall(r'H..', text)
print("Matching sequences:", any_characters)

text = "abc123"
any_character = re.findall(r'.', text)
print("Any characters found:", any_character)

Matching sequences: ['Hel', 'How']
Any characters found: ['a', 'b', 'c', '1', '2', '3']


In [75]:
# [] - Matches any character within the brackets
text = "find vowels aeiou"
vowels = re.findall(r'[aeiou]', text)
print("Vowels found:", vowels)

Vowels found: ['i', 'o', 'e', 'a', 'e', 'i', 'o', 'u']


In [77]:
# [^] - Matches any character not within the brackets
text = "abcdef"
non_vowels = re.findall(r'[^aeiou]', text)
print("Non-vowel characters:", non_vowels)

Non-vowel characters: ['b', 'c', 'd', 'f']


In [79]:
# * - Matches zero or more occurrences of the preceding pattern
text = "aabbbbc"
pattern = re.findall(r'b*', text)
print("Pattern with '*':", pattern)

Pattern with '*': ['', '', 'bbbb', '', '']


In [81]:
# + - Matches one or more occurrences of the preceding pattern
text = "aabbbbc"
pattern = re.findall(r'b+', text)
print("Pattern with '+':", pattern)

Pattern with '+': ['bbbb']


In [83]:
# ? - Matches zero or one occurrence of the preceding pattern
text = "color or colour?"
pattern = re.findall(r'colou?r', text)
print("Pattern with '?':", pattern)

Pattern with '?': ['color', 'colour']


In [85]:
# {n} - Matches exactly n occurrences of the preceding pattern
text = "123 4567 89"
exact_match = re.findall(r'\d{3}', text)
print("Exact 3 digits match:", exact_match)

Exact 3 digits match: ['123', '456']


In [87]:
# {n,} - Matches n or more occurrences of the preceding pattern
text = "123 4567 89"
n_or_more = re.findall(r'\d{3,}', text)
print("Match 3 or more digits:", n_or_more)

Match 3 or more digits: ['123', '4567']


In [89]:
# {n,m} - Matches between n and m occurrences of the preceding pattern
text = "123 4567 89"
between_n_m = re.findall(r'\d{2,4}', text)
print("Match between 2 to 4 digits:", between_n_m)

Match between 2 to 4 digits: ['123', '4567', '89']


In [95]:
#Example: Finding and Replacing an Email Address in a String
#1. Pattern Matching:
#In this example, we will search for an email address within a string using re.search().
#2. String Manipulation:
#Once we find the email, we will replace it with "REDACTED" using re.sub().

# Sample text with an email address
text = "Please contact us at support@example.com for further assistance."

# Regular expression to match an email address pattern
email_pattern = r'\w+@\w+\.\w+'

# Searching for the email address using re.search()
match = re.search(email_pattern, text)
if match:
    print(f"Matched email address: {match.group(0)}")

# Using re.sub() to replace the email address with 'REDACTED'
redacted_text = re.sub(email_pattern, 'REDACTED', text)
print("Text after redaction:", redacted_text)

Matched email address: support@example.com
Text after redaction: Please contact us at REDACTED for further assistance.
