In [1]:
# Imports
import re
import pandas as p

1. Write a function named `is_vowel`. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of `re.search` as a boolean value that indicates whether or not the regular expression matches the given string.

In [2]:
def is_vowel(s):
    """
    Determine if the passed string is a vowel.

    Parameters:
    s (str): The string to check

    Returns:
    bool: True if the string is a vowel, False otherwise
    """
    # Use regular expression to check if 's' is a single vowel character
    # (either lowercase or uppercase)
    return bool(re.search(r'^[aeiouAEIOU]$', s))

In [3]:
is_vowel('a')

True

In [4]:
is_vowel('i')

True

In [5]:
is_vowel('')

False

In [6]:
is_vowel('y')

False

In [7]:
is_vowel('U')

True

2. Write a function named `is_valid_username` that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either `True` or `False` depending on whether the passed string is a valid username.
>>> is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
False
>>> is_valid_username('codeup')
True
>>> is_valid_username('Codeup')
False
>>> is_valid_username('codeup123')
True
>>> is_valid_username('1codeup')
False

In [8]:
def is_valid_username(username):
    """
    Check if the given username is valid.

    A valid username starts with a lowercase letter, and only consists of
    lowercase letters, numbers, or the underscore character. It should also
    be no longer than 32 characters.

    Parameters:
    username (str): The username to validate

    Returns:
    bool: True if the username is valid, False otherwise
    """
    # Regular expression to match the criteria for a valid username
    return bool(re.match(r'^[a-z][a-z0-9_]{0,31}$', username))


In [9]:
# Sample true
is_valid_username('izel_segura')

True

In [10]:
# capital letter 'S'
is_valid_username('izel_Segura')

False

In [11]:
# lower case letters, '_', and numbers
is_valid_username('izel_segura1980')

True

In [12]:
# " " sample not valid
is_valid_username('izel segura')

False

In [13]:
# '∂' is not permitted
is_valid_username('izel_segur∂')

False

In [14]:
# numbers with a letter in front
is_valid_username('a1234567890')

True

In [15]:
# numbers without a letter in front
is_valid_username('1234567890')

False

In [16]:
# 32 characters valid
is_valid_username('aaaaaaaaa1aaaaaaaaa2aaaaaaaaa3aa')

True

In [17]:
# 33 characters NOT valid
is_valid_username('aaaaaaaaa1aaaaaaaaa2aaaaaaaaa3aaa')

False

3. Write a regular expression to capture phone numbers. It should match all of the following:

- (210) 867 5309
- +1 210.867.5309
- 867-5309
- 210-867-5309

In [18]:
# Function to test if a string is a phone number
def is_phone_number(string):
    phone_number_re = "(\+?\d+)?.?(\(?\d{3}\)?)?.?\d{3}.?\d{4}"
    
    return (re.search(phone_number_re, string))

In [19]:
# Example usage
test_strings = ["(210) 867 5309", "+1 210.867.5309", "867-5309", "210-867-5309", "1234567890"]

for string in test_strings:
    print(f"'{string}' is a phone number: {is_phone_number(string)}")

'(210) 867 5309' is a phone number: <re.Match object; span=(0, 14), match='(210) 867 5309'>
'+1 210.867.5309' is a phone number: <re.Match object; span=(0, 15), match='+1 210.867.5309'>
'867-5309' is a phone number: <re.Match object; span=(0, 8), match='867-5309'>
'210-867-5309' is a phone number: <re.Match object; span=(0, 12), match='210-867-5309'>
'1234567890' is a phone number: <re.Match object; span=(0, 10), match='1234567890'>


4. Use regular expressions to convert the dates below to the standardized year-month-day format.

- 02/04/19
- 02/05/19
- 02/06/19
- 02/07/19
- 02/08/19
- 02/09/19
- 02/10/19

In [20]:
def convert_dates(dates):
    """
    Convert a list of date strings from MM/DD/YY to YYYY-MM-DD format, taking into
    account 20th and 21st century dates.
    
    Parameters:
    dates (list of str): List of date strings to convert.
    
    Returns:
    dict: Dictionary with original date strings as keys and converted date strings as values.
    """
    converted_dates = {}
    
    for date in dates:
        # Use regular expression to capture the groups
        month, day, year = re.search(r'(\d{2})/(\d{2})/(\d{2})', date).groups()
        
        # Convert the two-digit year to four digits
        # If the year is less than 25, assume it belongs to the 2000s, otherwise 1900s
        if int(year) < 25:
            year = f"20{year}"
        else:
            year = f"19{year}"
        
        # Convert to year-month-day format
        standardized_date = f"{year}-{month}-{day}"
        
        # Add to the dictionary
        converted_dates[date] = standardized_date
    
    return converted_dates

In [21]:
# Example usage
date_strings = [
    "02/04/19",
    "02/05/19",
    "02/06/19",
    "02/07/19",
    "02/08/19",
    "02/09/19",
    "02/10/19",
    "04/03/89",  # Adding a specific cases
    "03/01/52",
    "12/25/25"
]

# Convert the dates
converted_date_strings = convert_dates(date_strings)
for original, converted in converted_date_strings.items():
    print(f"{original} -> {converted}")

02/04/19 -> 2019-02-04
02/05/19 -> 2019-02-05
02/06/19 -> 2019-02-06
02/07/19 -> 2019-02-07
02/08/19 -> 2019-02-08
02/09/19 -> 2019-02-09
02/10/19 -> 2019-02-10
04/03/89 -> 1989-04-03
03/01/52 -> 1952-03-01
12/25/25 -> 1925-12-25


5. Write a regex to extract the various parts of these logfile lines:

- GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58
- POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58
- GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58

In [22]:
# Define the regular expression pattern to extract the various parts of the logfile lines
logfile_regex = r'(\w+)\s+(\S+)\s+\[(.*?)\]\s+(\S+)\s+\{(\d+)\}\s+(\d+)\s+"(.*?)"\s+(\d+\.\d+\.\d+\.\d+)'

# List of logfile lines to extract information from
logfile_lines = [
    "GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 \"python-requests/2.21.0\" 97.105.19.58",
    "POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 \"User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36\" 97.105.19.58",
    "GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 \"python-requests/2.21.0\" 97.105.19.58"
]

# Function to extract and print the information from the log lines
def extract_log_info(log_lines, regex):
    for line in log_lines:
        match = re.match(regex, line)
        if match:
            method, path, timestamp, protocol, status_code, size, user_agent, ip = match.groups()
            print(f"Method: {method}")
            print(f"Path: {path}")
            print(f"Timestamp: {timestamp}")
            print(f"Protocol: {protocol}")
            print(f"Status Code: {status_code}")
            print(f"Size: {size}")
            print(f"User-Agent: {user_agent}")
            print(f"IP Address: {ip}")
            print("\n")  # Print a newline for better separation between entries

In [23]:
# Call the function to extract and print log information
extract_log_info(logfile_lines, logfile_regex)


Method: GET
Path: /api/v1/sales?page=86
Timestamp: 16/Apr/2019:193452+0000
Protocol: HTTP/1.1
Status Code: 200
Size: 510348
User-Agent: python-requests/2.21.0
IP Address: 97.105.19.58


Method: POST
Path: /users_accounts/file-upload
Timestamp: 16/Apr/2019:193452+0000
Protocol: HTTP/1.1
Status Code: 201
Size: 42
User-Agent: User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36
IP Address: 97.105.19.58


Method: GET
Path: /api/v1/items?page=3
Timestamp: 16/Apr/2019:193453+0000
Protocol: HTTP/1.1
Status Code: 429
Size: 3561
User-Agent: python-requests/2.21.0
IP Address: 97.105.19.58


