In [1]:
# Import the built-in re module:
import re


In [2]:
#Checks for a match only at the beginning of the string.
import re

text = "Hello World"
result = re.match("Hello", text)

print(result)  # Match found at beginning


<re.Match object; span=(0, 5), match='Hello'>


In [3]:
# Searches anywhere in the string.
text = "Say Hello to the world"
result = re.search("Hello", text)

print(result)  # Match found inside


<re.Match object; span=(4, 9), match='Hello'>


In [4]:
#Returns a list of all matches.
text = "Cat and bat and rat"
result = re.findall("at", text)

print(result)  # ['at', 'at', 'at']


['at', 'at', 'at']


In [6]:
#Splits string by the regex pattern.

text = "apple,banana,grape"
result = re.split(",", text)

print(result)  # ['apple', 'banana', 'grape']


['apple', 'banana', 'grape']


In [8]:
#Replaces matches with another string.
text = "Today is Sunday"
result = re.sub("Sunday", "Monday", text)

print(result)  # "Today is Monday"


Today is Monday


| Symbol | Meaning                   | Example   | Matches                  |
| ------ | ------------------------- | --------- | ------------------------ |
| `.`    | Any character except `\n` | `a.b`     | `acb`, `a9b`, `a*b`      |
| `^`    | Start of string           | `^Hi`     | "Hi there" (only start)  |
| `$`    | End of string             | `bye$`    | "say bye"                |
| `*`    | 0 or more times           | `ab*`     | `a`, `ab`, `abb`, `abbb` |
| `+`    | 1 or more times           | `ab+`     | `ab`, `abb`, not `a`     |
| `?`    | 0 or 1 time               | `ab?`     | `a`, `ab`                |
| `{n}`  | Exactly n times           | `a{3}`    | `aaa`                    |
| `[]`   | Character class           | `[aeiou]` | any vowel                |
| `\d`   | Digit (0-9)               | `\d+`     | `123`                    |
| `\w`   | Word (a-z, A-Z, 0-9, \_)  | `\w+`     | `word123`                |
| `\s`   | Whitespace                | `\s+`     | space, tab, newline      |


In [10]:
# 1. Validate Email
email = "test@example.com"
pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"

if re.match(pattern, email):
    print("Valid Email")
else:
    print("Invalid Email")


Valid Email


In [12]:
#2. Extract All Numbers
text = "There are 3 cats and 4 dogs."
numbers = re.findall(r"\d+", text)
print(numbers)  # ['3', '4']


['3', '4']


In [13]:
#Replace Multiple Spaces with One
text = "Python     is   awesome"
result = re.sub(r"\s+", " ", text)
print(result)  # "Python is awesome"


Python is awesome


In [14]:
#Extract Phone Numbers
text = "Call me at 9876543210 or 123-456-7890"
pattern = r"\b\d{10}\b|\b\d{3}-\d{3}-\d{4}\b"
result = re.findall(pattern, text)
print(result)  # ['9876543210', '123-456-7890']


['9876543210', '123-456-7890']


In [15]:
# Extract Hashtags and Mentions from Social Media
text = "Loving the #sunset and #beach! Follow me @gowtham123"
hashtags = re.findall(r"#\w+", text)
mentions = re.findall(r"@\w+", text)
print(hashtags)  # ['#sunset', '#beach']
print(mentions)  # ['@gowtham123']


['#sunset', '#beach']
['@gowtham123']


In [16]:
# Extract Dates (DD-MM-YYYY)
text = "The event is on 28-06-2025 and ends on 30-06-2025."
dates = re.findall(r"\b\d{2}-\d{2}-\d{4}\b", text)
print(dates)  # ['28-06-2025', '30-06-2025']


['28-06-2025', '30-06-2025']


In [17]:
#  Extract All Words Starting with Capital Letters
text = "My name is Gowtham and I live in Mumbai."
capital_words = re.findall(r"\b[A-Z][a-z]*\b", text)
print(capital_words)  # ['My', 'Gowtham', 'I', 'Mumbai']


['My', 'Gowtham', 'I', 'Mumbai']


In [18]:
#  Password Validation (Minimum 8 chars, 1 number, 1 uppercase, 1 special char)
password = "My@Pass123"
pattern = r"^(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$"

if re.match(pattern, password):
    print("Strong Password")
else:
    print("Weak Password")


Strong Password


In [20]:
#Groups allow you to capture specific parts of a match.
text = "Name: John, Age: 28"
match = re.search(r"Name:\s(\w+), Age:\s(\d+)", text)
print(match.group(1))  # John
print(match.group(2))  # 28


John
28


In [21]:
# Non-Greedy Matching (?)
text = "<p>Hello</p><p>World</p>"
result = re.findall(r"<p>.*?</p>", text)
print(result)  # ['<p>Hello</p>', '<p>World</p>']


['<p>Hello</p>', '<p>World</p>']


### Assignment 1: Extract all email addresses

emails = "Contact us at support@example.com, admin@site.org"


###  Assignment 2: Replace all digits with *

text = "My OTP is 9876"


### Assignment 3: Check if a string is a valid PAN number (e.g., ABCDE1234F)

PAN = 5 letters + 4 digits + 1 letter 
\n
text = "ABCDE1234F"


### Assignment 4: Split a paragraph into sentences

text = "Hello there! How are you doing? Let's meet tomorrow."
