    Ben Christensen
    Math 403
    9/6/18

Use regular expressions to format data to a standard form.

In [1]:
import re

In [2]:
# Problem 1
def prob1():
    """Compile and return a regular expression pattern object with the
    pattern string "python".

    Returns:
        (_sre.SRE_Pattern): a compiled regular expression pattern object.
    """
    return re.compile("python")

# Problem 2
def prob2():
    """Compile and return a regular expression pattern object that matches
    the string "^{@}(?)[%]{.}(*)[_]{&}$".

    Returns:
        (_sre.SRE_Pattern): a compiled regular expression pattern object.
    """
    return re.compile(r"\^\{@\}\(\?\)\[%\]\{\.\}\(\*\)\[_\]\{&\}\$")


# Problem 3
def prob3():
    """Compile and return a regular expression pattern object that matches
    the following strings (and no other strings).

        Book store          Mattress store          Grocery store
        Book supplier       Mattress supplier       Grocery supplier

    Returns:
        (_sre.SRE_Pattern): a compiled regular expression pattern object.
    """
    return re.compile(r"^(Book|Mattress|Grocery) (store|supplier)$")

# Problem 4
def prob4():
    """Compile and return a regular expression pattern object that matches
    any valid Python identifier.

    Returns:
        (_sre.SRE_Pattern): a compiled regular expression pattern object.
    """

    return re.compile(r"(^[a-zA-Z_][\w_]*$)|(^[a-zA-Z_][\w_]* *= *((\d+\.?\d*$)|('[^']*'$)|([a-zA-Z_][\w_]*$)))")
# Problem 5
def prob5(code):
    """Use regular expressions to place colons in the appropriate spots of the
    input string, representing Python code. You may assume that every possible
    colon is missing in the input string.

    Parameters:
        code (str): a string of Python code without any colons.

    Returns:
        (str): code, but with the colons inserted in the right places.
    """

    new_code = []
    key_words = ["if", "elif", "else", "for", "while", "try", "except", "finally"
                 "with", "def", "class"]
    for line in code.splitlines():
        for word in key_words:
            key_word_finder = re.compile("(\s" + word + ".*)")
            string = "\\1:"
            if bool(key_word_finder.search(line)):
                line = key_word_finder.sub(string, line)
        new_code.append(line)

    modified_code = ""
    for line in new_code:
        modified_code += line + "\n"

    return modified_code


# Problem 6
def prob6(filname="fake_contacts.txt"):
    """Use regular expressions to parse the data in the given file and format
    it uniformly, writing birthdays as mm/dd/yyyy and phone numbers as
    (xxx)xxx-xxxx. Construct a dictionary where the key is the name of an
    individual and the value is another dictionary containing their
    information. Each of these inner dictionaries should have the keys
    "birthday", "email", and "phone". In the case of missing data, map the key
    to None.

    Returns:
        (dict): a dictionary mapping names to a dictionary of personal info.
    """
    with open(filname, 'r') as infile:
        text = infile.readlines()
    contact_data = dict()
    #Create search functions for email, birthday, phone, and name
    s_email = re.compile(r"\w+(\.\w+)*@\w+(\.\w+)*")
    s_birthday = re.compile(r"(\d{1,2})/(\d{1,2})/\d{2}(\d{2})?")
    s_phone = re.compile(r"(1?-?)\(?\d{3}\)?-?\d{3}-?\d{4}")
    s_name = re.compile(r"[A-Za-z]+\.?( [A-Za-z]+\.?)*")
    #Create substitution functions for email, birthday, and phone to match standardization
    names, emails, birthdays, phones = [], [], [], []
    for line in text:
        #Gather email, birthday, and phone from each entry and leave only name
        email = s_email.search(line).group() if bool(s_email.search(line)) else None
        line = s_email.sub("", line)
        birthday = s_birthday.search(line).group() if bool(s_birthday.search(line)) else None
        line = s_birthday.sub("", line)
        phone = s_phone.search(line).group() if bool(s_phone.search(line)) else None
        line = s_phone.sub("", line)
        #Now gather the name, the only thing remaining.
        name = s_name.search(line).group()
        #Standardize the phone number if there is a phone number
        if phone is not None:
            while phone[1] == "-" or phone[0] == "-":
                phone = phone[1:]
            if phone[0] != "(":
                phone = "(" + phone[0:3] + ")" + phone[4:]
        #Standardize the birthday if there is a birthday
        if birthday is not None:
            if birthday[1] == "/":
                birthday = "0" + birthday
            if birthday[4] == "/":
                birthday = birthday[:3] + "0" + birthday[3:]
            if len(birthday) < 10:
                birthday = birthday[:6] + "20" + birthday[6:]

        contact_data[name] = {"birthday": birthday, "email": email, "phone": phone}
        #This block of code is for testing
        names.append(name)
        emails.append(email)
        birthdays.append(birthday)
        phones.append(phone)

    return contact_data


In [3]:
filename = "/Users/benchristensen/Desktop/ACME Python Labs/Volume3/RegularExpressions/fake_contacts.txt"
contact_data = prob6(filename)
contact_data["Uriah Workman"]

{'birthday': '12/10/2049',
 'email': 'Aliquam.auctor@vel.com',
 'phone': '(873)996-5622'}