# Simple literal match
pattern = "hello"  # Matches exactly "hello"

# Any single character
"." # Matches any character except newline\
pattern = "h.t"  # Matches "hot", "hat", "h@t", etc.

# Character classes
"[abc]"  # Matches any one character in brackets\
"[^abc]"  # Matches any character NOT in brackets\
"[a-z]"   # Matches any lowercase letter\
"[A-Z]"   # Matches any uppercase letter\
"[0-9]"   # Matches any digit

# Predefined character classes
"\d"  # Any digit [0-9]\
"\D"  # Any non-digit [^0-9]\
"\w"  # Any word character [a-zA-Z0-9_]\
"\W"  # Any non-word character\
"\s"  # Any whitespace (space, tab, newline)\
"\S"  # Any non-whitespace

# Basic quantifiers
"*"      # 0 or more occurrences\
"+"      # 1 or more occurrences\
"?"      # 0 or 1 occurrence\
"{n}"    # Exactly n occurrences\
"{n,}"   # n or more occurrences\
"{n,m}"  # Between n and m occurrences

import re

# Examples
pattern = r"ca*t"    # Matches "ct", "cat", "caat", etc.\
pattern = r"ca+t"    # Matches "cat", "caat", but not "ct"\
pattern = r"colou?r" # Matches "color" or "colour"\
pattern = r"\d{3}"   # Matches exactly 3 digits\
pattern = r"\d{2,4}" # Matches 2 to 4 digits

"^"      # Start of string/line\
"$"      # End of string/line\
"\b"     # Word boundary\
"\B"     # Not a word boundary

# Examples
pattern = r"^Start"  # String must start with "Start"\
pattern = r"end$"    # String must end with "end"\
pattern = r"\bcat\b" # Matches "cat" but not "category"

# Grouping with ()
(...)     # Capturing group\
(?:...)   # Non-capturing group\
(?P<name>...) # Named group

import re

# Example of groups
text = "John Doe, Jane Doe"\
pattern = r"(\w+) (\w+)"\
matches = re.findall(pattern, text)\
print(matches)  # [('John', 'Doe'), ('Jane', 'Doe')]\

# Named groups
pattern = r"(?P<first>\w+) (?P<last>\w+)"\
match = re.search(pattern, "John Doe")\
print(match.group('first'))  # John

In [2]:
import re

# Example of groups
text = "John Doe, Jane Doe"
pattern = r"(\w+) (\w+)"
matches = re.findall(pattern, text)
print(matches)  # [('John', 'Doe'), ('Jane', 'Doe')]

# Named groups
pattern = r"(?P<first>\w+) (?P<last>\w+)"
match = re.search(pattern, "John Doe")
print(match.group('first'))  # John

[('John', 'Doe'), ('Jane', 'Doe')]
John


In [1]:
import re

def validate_email(email):
    pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"
    return bool(re.match(pattern, email))

# Valid emails
print(validate_email("john@gmail.com"))         # True
print(validate_email("john.doe@my-site.com"))   # True
print(validate_email("user123@sub.domain.org")) # True

# Invalid emails
print(validate_email("john@.com"))              # False (missing domain)
print(validate_email("@gmail.com"))             # False (missing username)
print(validate_email("john@gmail"))             # False (missing top-level domain)
print(validate_email("john@gm@il.com"))         # False (multiple @)

True
True
True
False
False
False
False




```
pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"

```



1. ^ : Marks the start of the string
    - Ensures pattering matching begins from the start

2. [ \ w \\. -]  --> First part: Username
    - \w : matches any word charater (number, letters, underscore)
    - \\. is a period
    - \- is a dash

3. @: Means @

4. [ \ w \\. -] + --> Second part of username
    - Same pattern as the first part
    - Matches domains like: "gmail", "yahoo", etc

5. \\. is a period

6. \w+: matches one or more word characters

7. $: End of string

In [5]:
import re

# Email validation
email_pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"
print(re.match(email_pattern, "user@example.com"))

# Phone number format
phone_pattern = r"^\+?1?\d{9,15}$"
print(re.match(phone_pattern, "+123456789"))

# Extract URLs
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
text = 'https://www.kaggle.com/'
urls = re.findall(url_pattern, text)

# Parse log files
#log_pattern = r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) \[(\w+)\] (.*)'
#matches = re.finditer(log_pattern, log_text)

<re.Match object; span=(0, 16), match='user@example.com'>
<re.Match object; span=(0, 10), match='+123456789'>
