In [1]:
import re
import pandas as pd
import doctest

In [2]:
def is_vowel(s):
    return bool(re.search(r'^[aeiou]$', s.lower()))

In [3]:
def is_valid_username(s):
    username_re = r'^[a-z][a-zA-Z0-9_]{,31}$'
    return bool(re.search(username_re, s))

In [4]:
phone_re = r'''
(?P<country_code>\+\d+)?
\D*
(?P<area_code>\d{3})?
\D*
(?P<exchange_code>\d{3})
\D*
(?P<last_four>\d{4})$
'''

numbers = pd.Series([
    '(210) 867 5309',
    '+1 210.867.5309',
    '867-5309',
    '210-867-5309',
], name='original')

pd.concat([numbers, numbers.str.extract(phone_re, re.VERBOSE)], axis=1)

Unnamed: 0,original,country_code,area_code,exchange_code,last_four
0,(210) 867 5309,,210.0,867,5309
1,+1 210.867.5309,1.0,210.0,867,5309
2,867-5309,,,867,5309
3,210-867-5309,,210.0,867,5309


In [5]:
dates = pd.Series([
    '02/04/19',
    '02/05/19',
    '02/06/19',
    '02/07/19',
    '02/08/19',
    '02/09/19',
    '02/10/19',
])
dates.str.replace(r'(\d+)/(\d+)/(\d+)', r'20\3-\1-\2', regex=True)

0    2019-02-04
1    2019-02-05
2    2019-02-06
3    2019-02-07
4    2019-02-08
5    2019-02-09
6    2019-02-10
dtype: object

In [6]:
logfile_re = r'''
^(?P<method>GET|POST)
\s+
(?P<path>.*?)
\s+
\[(?P<timestamp>.*?)\]
\s+
(?P<http_version>.*?)
\s+
\{(?P<status>\d+)\}
\s+
(?P<bytes_sent>\d+)
\s+
"(?P<user_agent>.*)$
'''

lines = pd.Series([
    'GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58',
    'POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58',
    'GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58',
])
lines.str.extract(logfile_re, re.VERBOSE)

Unnamed: 0,method,path,timestamp,http_version,status,bytes_sent,user_agent
0,GET,/api/v1/sales?page=86,16/Apr/2019:193452+0000,HTTP/1.1,200,510348,"python-requests/2.21.0"" 97.105.19.58"
1,POST,/users_accounts/file-upload,16/Apr/2019:193452+0000,HTTP/1.1,201,42,User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; ...
2,GET,/api/v1/items?page=3,16/Apr/2019:193453+0000,HTTP/1.1,429,3561,"python-requests/2.21.0"" 97.105.19.58"
