In [1]:
import re 

regex = r"([a-zA-Z]+) (\d+)"
if re.search(regex, "June 24"):
    
    match = re.search(regex, "June 24")
    
    print("Match at index %s, %s" % (match.start(),match.end()))
    print("Full match: %s" % (match.group(0)))
    print("Month: %s" % (match.group(1)))
    print("Day: %s" % (match.group(2)))
else:
    print("The regex pattern does not match.:(")

Match at index 0, 7
Full match: June 24
Month: June
Day: 24


## Findall 

`matchList = re.findall(pattern, input_str, flags=0)
matchList = re.finditer(pattern, input_str, flags=0)`

If you need additional context for each match, you can use re.finditer() which instead returns an iterator of re.MatchObjects to walk through. Both methods take the same parameters.

In [3]:
import re
# Lets use a regular expression to match a few date strings.
regex = r"[a-zA-Z]+ \d+"
matches = re.findall(regex, "June 24, August 9, Dec 12")
for match in matches:

    print("Full match: %s" % (match))



Full match: June 24
Full match: August 9
Full match: Dec 12


In [4]:
# To capture the specific months of each date we can use the following pattern
regex = r"([a-zA-Z]+) \d+"
matches = re.findall(regex, "June 24, August 9, Dec 12")
for match in matches:

    print("Match month: %s" % (match))


Match month: June
Match month: August
Match month: Dec


In [5]:
# If we need the exact positions of each match
regex = r"([a-zA-Z]+) \d+"
matches = re.finditer(regex, "June 24, August 9, Dec 12")
for match in matches:

    print("Match at index: %s, %s" % (match.start(), match.end()))

Match at index: 0, 7
Match at index: 9, 17
Match at index: 19, 25


## Find and replace strings


`replacedString = re.sub(pattern, replacement_pattern, input_str, count, flags=0)`


In [6]:
regex = r"([a-zA-Z]+) (\d+)"
print(re.sub(regex, r"\2 of \1", "June 24, August 9, Dec 12"))

24 of June, 9 of August, 12 of Dec


## Flags

`re.IGNORECASE` makes the pattern case insensitive so that it matches strings of different capitalizations

`re.MULTILINE` is necessary if your input string has newline characters (\n), this flag allows the start and end metacharacter (^ and $ respectively) to match at the beginning and end of each line instead of at the beginning and end of the whole input string

`re.DOTALL` allows the dot (.) metacharacter match all characters, including the newline character (\n)


## Compiling for performance

In Python, creating a new regular expression pattern to match many strings can be slow, so it is recommended that you compile them if you need to be testing or extracting information from many input strings using the same expression. This method returns a re.RegexObject.

`regexObject = re.compile(pattern, flags=0)`

The returned object has exactly the same methods as above, except that they take the input string and no longer require the pattern or flags for each call.



In [7]:
import re

regex = re.compile(r"(\w+) World")
result = regex.search("Hello World is the easiest")
if result:

    print(result.start(), result.end())

for result in regex.findall("Hello World, Bonjour World"):
    print(result)

print(regex.sub(r"\1 Earth", "Hello World"))

0 11
Hello
Bonjour
Hello Earth
