# Regex Exercises

In [1]:
import pandas as pd
import numpy as np
import re

### 1. Write a function named `is_vowel`.
- It should accept a string as input and use a regular expression to determine if the passed string is a vowel.
- While not explicity mentioned in the lesson, you can treat the result of re.search as a boolean value that indicates whether or not the regular expression matches the given string.

In [2]:
def is_vowel(string):
    if re.search('^[aeiouAEIOU]$', string) == None:
        return False
    else:
        return True

In [3]:
is_vowel('a'), is_vowel('A')

(True, True)

In [4]:
is_vowel('aaa')

False

In [5]:
is_vowel('b')

False

### 2. Write a function named `is_valid_username` that accepts a string as input.
- A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character.
- It should also be no longer than 32 characters. The function should return either True or False depending on whether the passed string is a valid username.

```python
>>> is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
False
>>> is_valid_username('codeup')
True
>>> is_valid_username('Codeup')
False
>>> is_valid_username('codeup123')
True
>>> is_valid_username('1codeup')
False
```

In [6]:
def is_valid_username(string):
    valid_characters = re.findall('^[a-z][a-z0-9_]{,31}$', string)
    if valid_characters != []:
        return True
    else:
        return False

In [7]:
print(is_valid_username('aaaaaaaa'))
print(is_valid_username('codeup'))
print(is_valid_username('Codeup'))
print(is_valid_username('codeup123'))
print(is_valid_username('1codeup'))

True
True
False
True
False


### 3. Write a regular expression to capture phone numbers. It should match all of the following:

| Phone Numbers |
| :------------ |
| (210) 867 5309 |
| +1 210.867.5309 |
| 867-5309 |
| 210-867-5309 |

In [8]:
def is_valid_phone_number(phone_number='(210) 867 5309'):
    valid_exp = r'^[\(+]?[1]?\s?\d+[\s)-]\s?\d*[\.\-\s]\d+[-\.]?\d+[-]?\d+$'
    valid = re.findall(valid_exp, phone_number)
    if len(valid) == 0:
        return False
    else:
        return True

In [9]:
print(is_valid_phone_number('(210) 867 5309'))
print(is_valid_phone_number('+1 210.867.5309'))
print(is_valid_phone_number('867-5309'), 'Fix')
print(is_valid_phone_number('210-867-5309'))

True
True
False Fix
True


### 4. Use regular expressions to convert the dates below to the standardized year-month-day format.

| Dates | 
| :---- |
| 02/04/19 |
| 02/05/19 |
| 02/06/19 |
| 02/07/19 |
| 02/08/19 |
| 02/09/19 |
| 02/10/19 |

In [10]:
dates = pd.DataFrame(['02/04/19',
                      '02/05/19',
                      '02/06/19',
                      '02/07/19',
                      '02/08/19',
                      '02/09/19',
                      '02/10/19'])

dates = dates.rename(columns={0:'date'})
dates

Unnamed: 0,date
0,02/04/19
1,02/05/19
2,02/06/19
3,02/07/19
4,02/08/19
5,02/09/19
6,02/10/19


In [11]:
date_exp = '(\d\d)/(\d\d)/(\d\d)'

In [12]:
dates['date'].str.extract(r'(?P<month>\d+)/(?P<day>\d+)/(?P<year>\d+)')

Unnamed: 0,month,day,year
0,2,4,19
1,2,5,19
2,2,6,19
3,2,7,19
4,2,8,19
5,2,9,19
6,2,10,19


In [13]:
dates['date'].str.replace(r'(\d+)/(\d+)/(\d+)', r'20\3-\1-\2')

0    2019-02-04
1    2019-02-05
2    2019-02-06
3    2019-02-07
4    2019-02-08
5    2019-02-09
6    2019-02-10
Name: date, dtype: object

# 5. Write a regex to extract the various parts of these logfile lines:

GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58<br>
POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58<br>
GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58

In [14]:
logs = [
'GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58',
'POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58',
'GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58'
]

In [21]:
logsfile = pd.DataFrame(logs).rename(columns={0:'logs'})

In [41]:
logsfile.logs.str.extract(r'(GET|POST)\s/([/\w=\?\d]+)')

Unnamed: 0,0,1
0,GET,api/v1/sales?page=86
1,POST,users_accounts/file
2,GET,api/v1/items?page=3


### 6. You can find a list of words on your mac at /usr/share/dict/words. Use this file to answer the following questions:

    How many words have at least 3 vowels?
    How many words have at least 3 vowels in a row?
    How many words have at least 4 consonants in a row?
    How many words start and end with the same letter?
    How many words start and end with a vowel?
    How many words contain the same letter 3 times in a row?
    What other interesting patterns in words can you find?

In [17]:
dictionary = pd.read_csv('/usr/share/dict/words', header=None, squeeze=True)