# Regex Exercises

In [3]:
# inputs
import pandas as pd
import numpy as np

import re


### 1. Write a function named is_vowel. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of re.search as a boolean value that indicates whether or not the regular expression matches the given string.


In [6]:
def is_vowel(str):
    res = re.findall(r'[a,e,i,o,u]', str)
    return res
    

In [7]:
is_vowel('anything')

['a', 'i']

In [9]:
re.search(r'^[aeiouAEIOU]$', 'aaaa')

In [10]:
# regex --> we can check a range of characters
# --> we dont need necessarily to check length in the same way
# (because we have the starts with and ends with characters ^, $)
def is_vowel(some_string):
    #return (len(1) and type(str) and is in 'aeiouAEIOU')
    if type(some_string) == str:
        pattern = r'^[aeiouAEIOU]$'
        return bool(re.search(pattern, some_string))
    else:
        return False
    

### 2. Write a function named is_valid_username that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either True or False depending on whether the passed string is a valid username.

In [13]:
def is_valid_username(some_username):
    pattern = r'^[a-z][a-z\d_]{,31}$'
    return bool(re.search(pattern, some_username))

In [14]:
assert(is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') == False)


In [15]:

assert(is_valid_username('codeup') == True)


In [17]:

assert(is_valid_username('Codeup') == False)


In [18]:

assert(is_valid_username('codeup123') == True)


In [19]:

assert(is_valid_username('1codeup') == False)



### 3. Write a regular expression to capture phone numbers. It should match all of the following:

In [20]:
phone_nums = ['(210) 867 5309',
'+1 210.867.5309',
'867-5309',
'210-867-5309']


In [21]:
regex = '''
\+?(?P<region_code>\d)?
\D?
\(?(?P<area_code>\d{3})?\)?
\D?
(?P<first_three>\d{3})
.?
(?P<last_four>\d{4})$
'''

In [22]:
pd.Series(phone_nums).str.extract(regex, re.VERBOSE)

Unnamed: 0,region_code,area_code,first_three,last_four
0,,210.0,867,5309
1,1.0,210.0,867,5309
2,,,867,5309
3,,210.0,867,5309


### 4. Use regular expressions to convert the dates below to the standardized year-month-day format.


In [23]:
dates = ['02/04/19',
        '02/05/19',
        '02/06/19',
        '02/07/19',
        '02/08/19',
        '02/09/19',
        '02/10/19']


In [25]:
pattern = r'(\d{2})/(\d{2})/(\d{2})'
substitution = r'20\3-\1-\2'
re.sub(pattern,substitution,dates[0])

'2019-02-04'

In [26]:
pd.Series(dates).str.replace(pattern, substitution, regex=True)

0    2019-02-04
1    2019-02-05
2    2019-02-06
3    2019-02-07
4    2019-02-08
5    2019-02-09
6    2019-02-10
dtype: object

### 5. Write a regex to extract the various parts of these logfile lines:


In [27]:
log_files = '''GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58
POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58
GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58
'''

In [28]:
logs = pd.Series(log_files.split('\n'))

In [29]:
logs

0    GET /api/v1/sales?page=86 [16/Apr/2019:193452+...
1    POST /users_accounts/file-upload [16/Apr/2019:...
2    GET /api/v1/items?page=3 [16/Apr/2019:193453+0...
3                                                     
dtype: object

In [30]:
pattern = r'''
^(?P<method>GET|POST)
\s+
(?P<endpoint>.*)
\s+
\[(?P<timestamp>.*)\]
\s+
(?P<http_ver>.*)
\s+
\{(?P<response>\d+)\}
\s+
(?P<bytes>\d+)
\s+
"(?P<user_agent>.*)"
\s+
(?P<ip>.*)$
'''

In [31]:
logs.str.extract(pattern, re.VERBOSE)

Unnamed: 0,method,endpoint,timestamp,http_ver,response,bytes,user_agent,ip
0,GET,/api/v1/sales?page=86,16/Apr/2019:193452+0000,HTTP/1.1,200.0,510348.0,python-requests/2.21.0,97.105.19.58
1,POST,/users_accounts/file-upload,16/Apr/2019:193452+0000,HTTP/1.1,201.0,42.0,User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; ...,97.105.19.58
2,GET,/api/v1/items?page=3,16/Apr/2019:193453+0000,HTTP/1.1,429.0,3561.0,python-requests/2.21.0,97.105.19.58
3,,,,,,,,
