In [1]:
# Regular expression in python Using re module 

In [2]:
# 1 regular expression  
import re  # Import the re module to work with regular expressions

# Define the regular expression pattern
pattern = r"hello"  # The pattern we are looking for. 'r' makes it a raw string, so backslashes are treated literally.

# Define the text to search within
text = "hello world"  # The text where we want to search for the pattern

# Use re.search() to look for the pattern in the text
# It returns a match object if the pattern is found; otherwise, it returns None
match = re.search(pattern, text)

# Print the result
# If a match is found, use match.group() to get the matched substring
# If no match is found, print "No match"
print("Match found:", match.group() if match else "No match")

Match found: hello


In [3]:
# Simple character matches 
import re  # Import the regular expressions module

# Define the regular expression pattern
pattern = r"cat"  # The pattern we are looking for, which matches the substring "cat"

# Define the text to search within
text = "The cat sat on the mat."  # The text where we want to search for the pattern

# Use re.search() to look for the pattern in the text
# It returns a match object if the pattern is found; otherwise, it returns None
match = re.search(pattern, text)

# Print the result
# If a match is found, use match.group() to get the matched substring
# If no match is found, print "No match"
print("Match found:", match.group() if match else "No match")

Match found: cat


In [4]:
# 3 Special Characters
import re 

# Define the regular expression pattern
pattern = r"c.t"  # The pattern matches any substring starting with 'c', ending with 't', and having any character in between

# Define the text to search within
text = "The cat sat."  # The text where we want to search for the pattern

# Use re.search() to look for the pattern in the text
# It returns a match object if the pattern is found; otherwise, it returns None
match = re.search(pattern, text)

# Print the result
# If a match is found, use match.group() to get the matched substring
# If no match is found, print "No match"
print("Match found:", match.group() if match else "No match")

Match found: cat


In [5]:
# 4 charcter classes 
import re  

pattern = r"[a-z]+"  # Matches one or more consecutive lowercase letters (a-z)

# Define the text to search within
text = "Hello World 123"  # The text where we want to search for the pattern

# Use re.findall() to find all matches of the pattern in the text
# It returns a list of all non-overlapping matches
matches = re.findall(pattern, text)

# Print the matches found
print("Matches found:", matches)

Matches found: ['ello', 'orld']


In [6]:
# 5 Quantifiers 
import re  

pattern = r"\d{3}"  # Matches exactly three consecutive digits (0-9)

# Define the text to search within
text = "My number is 1234567890"  # The text where we want to search for the pattern

# Use re.findall() to find all matches of the pattern in the text
# It returns a list of all non-overlapping matches
matches = re.findall(pattern, text)

# Print the matches found
print("Matches found:", matches)

Matches found: ['123', '456', '789']


In [7]:
# 6 dot character (.)
pattern = r"h.llo"
text = "hello"
match = re.search(pattern, text)
print("Match found:", match.group() if match else "No match")

Match found: hello


In [8]:
# 7  Greedy Matches 
import re  # Import the regular expressions module

# Define a greedy pattern
pattern = r"<.*>"  # Matches any string starting with '<' and ending with '>', consuming as much as possible (greedy).

# Define the text to search within
text = "content"  # This is the input text to match against.

# Use re.search() with the greedy pattern
match = re.search(pattern, text)

# Print the result of the greedy match
print("Greedy match:", match.group() if match else "No match")

# Define a lazy pattern
pattern_lazy = r"<.*?>"  # Matches any string starting with '<' and ending with '>', consuming as little as possible (lazy).

# Use re.search() with the lazy pattern
match_lazy = re.search(pattern_lazy, text)

# Print the result of the lazy match
print("Lazy match:", match_lazy.group() if match_lazy else "No match")


Greedy match: No match
Lazy match: No match


In [9]:
# 8 Grouping 
import re  # Import the regular expressions module

#  regular expression pattern
pattern = r"(\d{3})-(\d{2})"  
# (\d{3}): Matches exactly three consecutive digits (area code), captured as the first group.
# -: Matches the literal dash character.
# (\d{2}): Matches exactly two consecutive digits (local code), captured as the second group.

# Define the text to search within
text = "Phone number: 123-45"  # The input text containing the area and local codes.

# Use re.search() to search for the pattern in the text
# It returns a match object if the pattern is found; otherwise, it returns None
match = re.search(pattern, text)

# If a match is found, extract and print the groups
if match:
    print("Area code:", match.group(1))  # Group 1 corresponds to the first set of parentheses (\d{3}).
    print("Local code:", match.group(2))  # Group 2 corresponds to the second set of parentheses (\d{2}).
else:
    print("No match found")  # If the pattern is not found, print this message.


Area code: 123
Local code: 45


In [10]:
#  9 Matching at Begnning or End
import re  # Importing  regular expressions module

# Define a pattern to match "Hello" at the start of the string
pattern = r"^Hello"  # The caret (^) asserts that "Hello" must occur at the start of the text

# Define the text to search within
text = "Hello world"  # The text to test for matches

# Use re.search() to check for a match at the start
match = re.search(pattern, text)

# Print the result for the start match
print("Match at start:", match.group() if match else "No match")

# Define a pattern to match "world" at the end of the string
pattern = r"world$"  # The dollar sign ($) asserts that "world" must occur at the end of the text

# Use re.search() to check for a match at the end
match = re.search(pattern, text)

# Print the result for the end match
print("Match at end:", match.group() if match else "No match")

Match at start: Hello
Match at end: world


In [11]:
# 10 Match objects 
import re  # Import the regular expressions module

# Define the pattern to match the word "world"
pattern = r"world"  # This matches the literal string "world"

# Define the text to search within
text = "Hello world"  # The text in which we're looking for the pattern

# Use re.search() to find the first occurrence of the pattern
match = re.search(pattern, text)

# If a match is found, extract the matched text and its position
if match:
    print("Matched text:", match.group())  # Returns the matched substring (i.e., "world")
    print("Start position:", match.start())  # Returns the starting index of the match
    print("End position:", match.end())  # Returns the ending index (exclusive) of the match
else:
    print("No match found")


Matched text: world
Start position: 6
End position: 11


In [12]:
# 11 substituting
# using re.sub()

import re  # Import the regular expressions module

# Define the pattern to match the word "cat"
pattern = r"cat"  # Matches the literal string "cat"

# Define the text to search within
text = "The cat sat on the mat."  # The text where we want to replace "cat" with "dog"

# Use re.sub() to substitute the matched pattern with the string "dog"
result = re.sub(pattern, "dog", text)

# Print the modified text after substitution
print("After substitution:", result)

After substitution: The dog sat on the mat.


In [13]:
# 12 splitting a String 
import re  # Import the regular expressions module

# Define the pattern to match one or more spaces
pattern = r"\s+"  # \s matches any whitespace character (space, tab, newline), and '+' means "one or more"

# Define the text to split
text = "Split this sentence by spaces"  # The input string we want to split

# Use re.split() to split the text by the pattern (one or more spaces)
result = re.split(pattern, text)

# Print the result after splitting
print("Split result:", result)

Split result: ['Split', 'this', 'sentence', 'by', 'spaces']


In [14]:
# 13 Compilling regular expression
# Using re.compile() and findall()

import re  # Import the regular expressions module

# Compile the regular expression pattern to match one or more digits
pattern = re.compile(r"\d+")  # \d matches any digit, and + means "one or more" digits in a row

# Define the text to search within
text = "123 apples, 456 bananas"  # The input string containing numbers

# Using  findall() to find all occurrences of the pattern in the text
matches = pattern.findall(text)

# Print the result: the list of matched numbers
print("Matches:", matches)


Matches: ['123', '456']


In [15]:
# 14 Flags 
import re  # Import the regular expressions module

# Define the pattern to match the word "hello"
pattern = r"hello"  # Matches the literal string "hello"

# Define the text to search within
text = "Hello"  # The text we want to search for the pattern

# Use re.search() to search for the pattern with case-insensitive matching
# re.IGNORECASE allows the pattern to match regardless of case (e.g., "hello" or "Hello")
match = re.search(pattern, text, re.IGNORECASE)

# Print the result: the matched text if found, or a message saying no match
print("Case-insensitive match:", match.group() if match else "No match")

Case-insensitive match: Hello
