# Regular Expressions

Regular Expressions can be hard. Luckily, there are some great online tools to help you build and test them:

https://regex101.com/

---

In [1]:
import re

In [2]:
phone1 = "314-598-5494"

In [3]:
matches = re.match(r"(\d{3})-(\d{3})-(\d{4})",phone1)

In [4]:
matches.group(0)

'314-598-5494'

In [5]:
phones = [
    '314-598-5494',
    '(555) 321-1234',
    '+1 555-123-4567'
]

In [6]:
for phone in phones:
    matches = re.match(r"(\(?\d{3}\)?)[-\ ](\d{3})-(\d{4})", phone)
    if matches is None:
        print(f"Doesn't match: {phone}")
    else:
        print(matches.group(0))

314-598-5494
(555) 321-1234
Doesn't match: +1 555-123-4567


# Name Parsing

In [7]:
# Names are formatted as First MI. Last
name="Paul E. Boal"

In [8]:
matches = re.match(r"^([\w]+) ([A-Z])\. ([\w]+)", name)

In [9]:
# The whole match
matches.group(0)

'Paul E. Boal'

In [10]:
# THe first subgroup
matches.group(1)

'Paul'

In [11]:
matches.group(2)

'E'

In [12]:
matches.group(3)

'Boal'

# Add on Credentials!

In [13]:
names = [
    "Steven G. Sanders, MD",
    "Alice D. Edwards, PhD",
    "Joseph B. Lester, MD, PhD"
]

In [14]:
for name in names:
    matches = re.match(r"^([\w]+) ([A-Z])\. ([\w]+), ([\w\.\, ]+)", name)
    first = matches.group(1)
    middle = matches.group(2)
    last = matches.group(3)
    creds = matches.group(4).split(', ')
    print([first, middle, last, creds])

['Steven', 'G', 'Sanders', ['MD']]
['Alice', 'D', 'Edwards', ['PhD']]
['Joseph', 'B', 'Lester', ['MD', 'PhD']]


# Date of Birth

Month ##, ####

In [15]:
birthday="March 25, 2011"

In [16]:
matches = re.match(r"([a-zA-Z]+) ([0-9]{1,2}), ([0-9]{4})", birthday)

In [17]:
matches.group(0)

'March 25, 2011'

In [18]:
matches.group(1)

'March'

In [19]:
matches.group(2)

'25'

In [20]:
matches.group(3)

'2011'