# Introduction to Regular Expressions

In [1]:
import re

In [90]:
def check(true_match_list, false_match_list=[], pattern=None, verbose=False):
    pattern = input("Enter a regex here: ") if pattern is None else pattern
    print("The regex you entered is: " + pattern)
    x = re.findall(pattern, str(true_match_list + false_match_list))
    if verbose:
        print("The matches are: " + str(x))
    if x == true_match_list:
        print(f"Matching with {str(true_match_list)}.")
    else:
        print(f"Not matching with {str(true_match_list)}.")

## 2. Literals

In [60]:
check(["bark"], ["baa", "bellow", "boom"])

The regex you entered is: bark
Matching with ['bark'].


## 3. Alternation

In [61]:
check(["cat", "dog"])

The regex you entered is: cat|dog
Matching with ['cat', 'dog'].


## 4. Character Sets

In [62]:
check(["cat", "hat", "rat"], ["eat", "mat", "sat"])

The regex you entered is: [chr]at
Matching with ['cat', 'hat', 'rat'].


## 5. Wildcards

In [64]:
check(["bear.", "lion.", "orca."], ["mouse", "koala", "snail"])

The regex you entered is: ....\.
Matching with ['bear.', 'lion.', 'orca.'].


## 6. Ranges

In [65]:
check(["cub", "dog", "elk"], ["ape", "cow", "ewe"])

The regex you entered is: [c-e][uol][bgk]
Matching with ['cub', 'dog', 'elk'].


## 7. Shorthand Character Classes

In [66]:
check(["5 sloths", "8 llamas", "7 hyenas"], ["one bird", "two owls"])

The regex you entered is: \d\s\w\w\w\w\w\w
Matching with ['5 sloths', '8 llamas', '7 hyenas'].


## 8. Grouping

In [93]:
true_match_list = [
    "puppies are my favorite!", 
    "kitty cats are my favorite!"
]
false_match_list = [
    "deer are my favorite!", 
    "otters are my favorite!",
    "hedgehogs are my favorite!"
]

# (?: ...) is a non-capturing group, meaning it groups the alternatives 
# but does not create a separate capture group.
pattern = "(?:puppies|kitty cats) are my favorite!"

# In Python, when you use re.findall() with a regex pattern that contains 
# groups (like (puppies|kitty cats)), it returns a list of all matched 
# groups, not the entire matched strings.
check(true_match_list, false_match_list, pattern, verbose=True)

The regex you entered is: (?:puppies|kitty cats) are my favorite!
The matches are: ['puppies are my favorite!', 'kitty cats are my favorite!']
Matching with ['puppies are my favorite!', 'kitty cats are my favorite!'].


## 9. Quantifiers - Fixed

In [94]:
true_match_list = [
    "squeaaak", 
    "squeaaaak",
    "squeaaaaak"
]
false_match_list = [
    "squeak", 
    "squeaak",
    "squeaaaaaak"
]

pattern = "squea{3,5}k"

check(true_match_list, false_match_list, pattern)

The regex you entered is: squea{3,5}k
Matching with ['squeaaak', 'squeaaaak', 'squeaaaaak'].


## 10. Quantifiers - Optional

In [98]:
true_match_list = [
    "1 duck for adoption?", 
    "5 ducks for adoption?",
    "7 ducks for adoption?"
]

pattern = "\d ducks? for adoption\?"

check(true_match_list, pattern=pattern)

The regex you entered is: \d ducks? for adoption\?
Matching with ['1 duck for adoption?', '5 ducks for adoption?', '7 ducks for adoption?'].


## 11. Quantifiers - 0 or More, 1 or More

In [99]:
true_match_list = [
    "hoot", 
    "hoooooot",
    "hooooooooooot"
]
false_match_list = [
    "hot", 
    "hoat",
    "hoo"
]

pattern = "hoo+t"

check(true_match_list, false_match_list, pattern)

The regex you entered is: hoo+t
Matching with ['hoot', 'hoooooot', 'hooooooooooot'].


## 12. Anchors

In [100]:
true_match_list = [
    "penguins are cooler than regular expressions"
]
false_match_list = [
    "king penguins are cooler than regular expressions", 
    "penguins are cooler than regular expressions!"
]

pattern = "^penguins are cooler than regular expressions$"

check(true_match_list, false_match_list, pattern)

The regex you entered is: ^penguins are cooler than regular expressions$
Matching with ['penguins are cooler than regular expressions'].


## 13. Review

In [101]:
true_match_list = [
    "718-555-3810",
    "9175552849",
    "1 212 555 3821",
    "(917)5551298",
    "212.555.8731"
]
false_match_list = [
    "wildebeest", 
    "hippopotamus",
    "woolly mammoth"
]

pattern = "[\d(]{1}[\s\d]{1}\d[-.\d]{1}[)\d]{1}[\s\d]{1}\d[-.\d]{1}\d[\s\d]{1}\d*"

check(true_match_list, false_match_list, pattern)

The regex you entered is: [\d(]{1}[\s\d]{1}\d[-.\d]{1}[)\d]{1}[\s\d]{1}\d[-.\d]{1}\d[\s\d]{1}\d*
Matching with ['718-555-3810', '9175552849', '1 212 555 3821', '(917)5551298', '212.555.8731'].
