In [1]:
# Step 1: Importing the 're' module
# Regular expressions in Python are supported by the 're' module.
import re

In [3]:
# Step 2: Basic pattern matching
# We'll start with a simple example of finding a word in a string using re.search().
# re.search() looks for the first location where the pattern matches and returns a match object.

text = "Welcome to the world of Python programming!"
pattern = "Python"

match = re.search(pattern, text)
if match:
    print(f"Pattern '{pattern}' found at position {match.start()} to {match.end()}")
else:
    print(f"Pattern '{pattern}' not found.")

# Explanation:
# 're.search()' checks if the pattern exists in the text.
# 'match.start()' and 'match.end()' give the position where the match occurs.

Pattern 'Python' found at position 24 to 30


In [4]:
# Step 3: Using special characters
# Regular expressions support special characters like '.', '*', '+', etc.
# Let's use '.' to match any single character.

text = "cat, cot, cut"
pattern = "c.t"  # This pattern matches any string that starts with 'c' and ends with 't' with any character in between.

matches = re.findall(pattern, text)
print(f"Matches for pattern '{pattern}': {matches}")

# Explanation:
# 're.findall()' returns all matches of the pattern in the text.

Matches for pattern 'c.t': ['cat', 'cot', 'cut']


In [5]:
# Step 4: Working with character sets and ranges
# You can use square brackets [] to specify a set of characters.
# For example, '[aeiou]' will match any vowel.

text = "bat, bet, bit, bot, but"
pattern = "b[aeiou]t"  # Matches words that start with 'b', end with 't', and have a vowel in between.

matches = re.findall(pattern, text)
print(f"Matches for pattern '{pattern}': {matches}")

Matches for pattern 'b[aeiou]t': ['bat', 'bet', 'bit', 'bot', 'but']


In [6]:
# Step 5: Quantifiers
# Quantifiers allow you to specify how many times a character or a group should repeat.
# '*' matches 0 or more times, '+' matches 1 or more times, and '?' matches 0 or 1 time.

text = "gooooal!"
pattern = "go*al"  # Matches 'g', followed by any number of 'o's, and ends with 'al'.

match = re.search(pattern, text)
if match:
    print(f"Pattern '{pattern}' found: {match.group()}")

# Explanation:
# 'go*al' will match 'g', followed by zero or more 'o's, and then 'al'.

Pattern 'go*al' found: gooooal


In [7]:
# Step 6: Grouping and capturing
# You can group patterns using parentheses ().
# This is useful when you want to extract specific parts of a match.

text = "My phone number is 123-456-7890."
pattern = r"(\d{3})-(\d{3})-(\d{4})"  # This pattern captures phone numbers in the format XXX-XXX-XXXX.

match = re.search(pattern, text)
if match:
    print(f"Phone number found: {match.group()}")
    print(f"Area code: {match.group(1)}")
    print(f"Main number: {match.group(2)}-{match.group(3)}")

# Explanation:
# '(\d{3})' captures exactly three digits. '\d' matches any digit, and '{3}' specifies that we want exactly three occurrences.
# The full phone number is captured by match.group(), and individual groups are accessed using match.group(1), match.group(2), etc.

Phone number found: 123-456-7890
Area code: 123
Main number: 456-7890


In [8]:
# Step 7: Escaping special characters
# If you want to match special characters like '.', '*', or '+', you need to escape them using a backslash '\'.

text = "The price is $5.99."
pattern = r"\$\d+\.\d{2}"  # This pattern matches a dollar sign followed by digits, a dot, and two digits.

match = re.search(pattern, text)
if match:
    print(f"Price found: {match.group()}")

# Explanation:
# '\$' matches the dollar sign, '\d+' matches one or more digits, and '\.\d{2}' matches a dot followed by exactly two digits.

Price found: $5.99


In [9]:
# Step 8: Using flags for case-insensitive matching
# Flags allow you to modify the behavior of the regex. For example, re.IGNORECASE makes the pattern case-insensitive.

text = "Python is fun. PYTHON is powerful."
pattern = "python"

matches = re.findall(pattern, text, re.IGNORECASE)  # re.IGNORECASE makes the search case-insensitive.
print(f"Case-insensitive matches for '{pattern}': {matches}")

Case-insensitive matches for 'python': ['Python', 'PYTHON']


In [10]:
# Step 9: Substituting text with re.sub()
# You can replace parts of a string that match a pattern using re.sub().

text = "The sky is blue."
pattern = "blue"
replacement = "green"

new_text = re.sub(pattern, replacement, text)
print(f"Updated text: {new_text}")

# Explanation:
# 're.sub()' replaces all occurrences of the pattern in the text with the specified replacement.

Updated text: The sky is green.


In [11]:
# Step 10: Splitting a string with re.split()
# re.split() allows you to split a string based on a pattern, similar to str.split() but with more flexibility.

text = "apple, orange; banana: grape"
pattern = r"[,;:]"

fruits = re.split(pattern, text)
print(f"Fruits list: {fruits}")

# Explanation:
# 're.split()' splits the string at each comma, semicolon, or colon.

Fruits list: ['apple', ' orange', ' banana', ' grape']
