In [None]:
import re

# 1. What is the name of the feature responsible for generating Regex objects?
# The 're' module in Python is responsible for generating Regex objects.
pattern = re.compile(r'\d+')

# 2. Why do raw strings often appear in Regex objects?
# Raw strings (prefix 'r') are used in Regex objects to avoid having to escape backslashes.
# For example: r'\d+'
# Without the 'r', you'd have to write '\\d+' instead.

# 3. What is the return value of the search() method?
# The search() method returns a Match object if the pattern is found, otherwise None.
search_result = re.search(r'\d+', '123 abc')
print(search_result.group())  # Output: '123'

# 4. From a Match item, how do you get the actual strings that match the pattern?
# You can use the group() method from the Match object.
match = re.search(r'(\d+)', 'abc123')
print(match.group(1))  # Output: '123'

# 5. In the regex r'(\d\d\d)-(\d\d\d-\d\d\d\d)', what does group zero cover? Group 2? Group 1?
regex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
match = regex.search('123-456-7890')
print(match.group(0))  # Output: '123-456-7890' (whole match)
print(match.group(1))  # Output: '123' (group 1)
print(match.group(2))  # Output: '456-7890' (group 2)

# 6. How can you tell a regex that you want it to fit real parentheses and periods?
# You can escape them with a backslash (\).
regex = re.compile(r'\(\d{3}\)\.\d{3}-\d{4}')
match = regex.search('(123).456-7890')
print(match.group())  # Output: '(123).456-7890'

# 7. The findall() method returns a string list or a list of string tuples. What causes it to return one of the two options?
# If the regex has capturing groups, findall() returns a list of tuples. Otherwise, it returns a list of strings.
regex = re.compile(r'(\d{3})-(\d{3})')
print(regex.findall('123-456 and 789-012'))  # Output: [('123', '456'), ('789', '012')]

# 8. In standard expressions, what does the | character mean?
# The '|' character means "or". It matches either the pattern on the left or the pattern on the right.
regex = re.compile(r'cat|dog')
print(regex.findall('I have a cat and a dog.'))  # Output: ['cat', 'dog']

# 9. In regular expressions, what does the . character stand for?
# The '.' character matches any character except a newline.
regex = re.compile(r'a.b')
print(regex.match('aab'))  # Output: <re.Match object; span=(0, 3), match='aab'>

# 10. What is the difference between the + and * characters?
# '+' matches one or more occurrences of the preceding character or group.
# '*' matches zero or more occurrences of the preceding character or group.
regex_plus = re.compile(r'a+b')
print(regex_plus.match('aaab'))  # Output: <re.Match object; span=(0, 4), match='aaab'>
regex_star = re.compile(r'a*b')
print(regex_star.match('b'))  # Output: <re.Match object; span=(0, 1), match='b'>

# 11. Difference between {4} and {4,5} in regular expression?
# {4} matches exactly four occurrences of the preceding character or group.
# {4,5} matches between four and five occurrences.
regex_4 = re.compile(r'a{4}')
regex_4_5 = re.compile(r'a{4,5}')
print(regex_4.match('aaaa'))  # Output: <re.Match object; span=(0, 4), match='aaaa'>
print(regex_4_5.match('aaaaa'))  # Output: <re.Match object; span=(0, 5), match='aaaaa'>

# 12. What do \d, \w, and \s signify in regular expressions?
# \d: Matches any digit (0-9)
# \w: Matches any alphanumeric character (a-z, A-Z, 0-9, _)
# \s: Matches any whitespace character (spaces, tabs, newlines)
regex_digit = re.compile(r'\d')
regex_word = re.compile(r'\w')
regex_space = re.compile(r'\s')
print(regex_digit.match('3'))  # Matches a digit
print(regex_word.match('a'))  # Matches a word character
print(regex_space.match(' '))  # Matches a whitespace character

# 13. What do \D, \W, and \S signify in regular expressions?
# \D: Matches any non-digit
# \W: Matches any non-alphanumeric character
# \S: Matches any non-whitespace character
regex_non_digit = re.compile(r'\D')
regex_non_word = re.compile(r'\W')
regex_non_space = re.compile(r'\S')
print(regex_non_digit.match('a'))  # Matches non-digit
print(regex_non_word.match('@'))  # Matches non-word character
print(regex_non_space.match('a'))  # Matches non-space character

# 14. Difference between .*? and .*?
# The difference is that .*? is non-greedy (matches the shortest possible match), while .* is greedy (matches the longest possible match).
regex_greedy = re.compile(r'<.*>')
regex_non_greedy = re.compile(r'<.*?>')
text = '<tag>some content</tag>'
print(regex_greedy.search(text).group())  # Output: '<tag>some content</tag>'
print(regex_non_greedy.search(text).group())  # Output: '<tag>'

# 15. Syntax for matching both numbers and lowercase letters with a character class?
regex = re.compile(r'[0-9a-z]')
print(regex.findall('123abc'))  # Output: ['1', '2', '3', 'a', 'b', 'c']

# 16. Procedure for making a normal expression case insensitive?
# You can pass re.IGNORECASE or re.I as the second argument to re.compile().
regex = re.compile(r'hello', re.IGNORECASE)
print(regex.match('HELLO'))  # Output: <re.Match object; span=(0, 5), match='HELLO'>

# 17. What does the . character normally match? What does it match if re.DOTALL is passed as 2nd argument in re.compile()?
# Normally, '.' matches any character except a newline.
# When re.DOTALL is passed, '.' matches any character including newlines.
regex_normal = re.compile(r'.')
print(regex_normal.match('a'))  # Matches 'a'
regex_dotall = re.compile(r'.', re.DOTALL)
print(regex_dotall.match('\n'))  # Matches newline

# 18. If numReg = re.compile(r'\d+'), what will numRegex.sub('X', '11 drummers, 10 pipers, five rings, 4 hen') return?
numReg = re.compile(r'\d+')
print(numReg.sub('X', '11 drummers, 10 pipers, five rings, 4 hen'))  # Output: 'X drummers, X pipers, five rings, X hen'

# 19. What does passing re.VERBOSE as the 2nd argument to re.compile() allow you to do?
# It allows you to write regular expressions that are more readable by allowing whitespace and comments.
regex_verbose = re.compile(r"""
    \d{3}    # Area code
    -        # Dash separator
    \d{3}    # First 3 digits
    -        # Dash separator
    \d{4}    # Last 4 digits
""", re.VERBOSE)
print(regex_verbose.match('123-456-7890'))  # Output: <re.Match object; span=(0, 12), match='123-456-7890'>

# 20. Regex to match numbers with commas for every three digits:
regex_number = re.compile(r'^\d{1,3}(?:,\d{3})*$')
print(regex_number.match('1,234'))  # Matches
print(regex_number.match('42'))     # Matches
print(regex_number.match('6,368,745'))  # Matches
print(regex_number.match('12,34,567'))  # Doesn't match
print(regex_number.match('1234'))  # Doesn't match

# 21. Regex to match a full name with the last name Watanabe:
regex_name = re.compile(r'^[A-Z][a-z]+ Watanabe$')
print(regex_name.match('Haruto Watanabe'))  # Matches
print(regex_name.match('Alice Watanabe'))   # Matches
print(regex_name.match('RoboCop Watanabe'))  # Matches
print(regex_name.match('haruto Watanabe'))  # Doesn't match

# 22. Regex to match a sentence with specific word conditions:
regex_sentence = re.compile(r'^(Alice|Bob|Carol) (eats|pets|throws) (apples|cats|baseballs)\.$', re.IGNORECASE)
print(regex_sentence.match('Alice eats apples.'))  # Matches
print(regex_sentence.match('BOB EATS CATS.'))     # Matches
print(regex_sentence.match('Carol throws baseballs.'))  # Matches
