## Capturing Groups

In [55]:
import re
result=re.search(r"^(\w*), (\w*)$","Lovelace, Ada")
print(result)

<re.Match object; span=(0, 13), match='Lovelace, Ada'>


In [56]:
# groups

print(result.groups())
print(result[0])
print(result[1])
print(result[2])

('Lovelace', 'Ada')
Lovelace, Ada
Lovelace
Ada


In [57]:
def rearrange_name(name):
    result=re.search(r"^([\w .-]*), ([\w .-]*)$",name)
    if result is None:
        return name
    return "{} {}".format(result[2],result[1])

print(rearrange_name("HIna, Khaidm"))

Khaidm HIna


## More on repetition Qualifiers

In [58]:
print(re.search(r"[A-Za-z]{5}","a ghost"))
print(re.findall(r"[A-Za-z]{5}","a ghost scary appeared"))

#  /b which is used to find length at the beginning and end.

print(re.findall(r"\b[A-Za-z]{5}\b","a ghost scary appeared"))
print(re.findall(r"\w{5,10}","a ghost scary appeared"))
print(re.findall(r"\w{5,}","a ghost scary appeared"))
print(re.findall(r"s\w{,10}","a ghost scary appeared"))

<re.Match object; span=(2, 7), match='ghost'>
['ghost', 'scary', 'appea']
['ghost', 'scary']
['ghost', 'scary', 'appeared']
['ghost', 'scary', 'appeared']
['st', 'scary']


## Extracting a PID using REgexes in Python

In [59]:
import re
def extract_pid(log_line):
    regex = r"(\[(\d+)\]): ([A-Z]*)"
    result = re.search(regex, log_line)
    if result is None:
        return None
    return "{} ({})".format(result[1],result[3])

print(extract_pid("July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade")) # 12345 (ERROR)
print(extract_pid("99 elephants in a [cage]")) # None
print(extract_pid("A string that also has numbers [34567] but no uppercase message")) # None
print(extract_pid("July 31 08:08:08 mycomputer new_process[67890]: RUNNING Performing backup")) # 67890 (RUNNING)

[12345] (ERROR)
None
None
[67890] (RUNNING)


## Splitting and Replacing

#### Parenthisis play a vital rule. It is use to group and other example is given below

In [60]:
re.split(r"[.?!]","One sentence. another one? last!")
re.split(r"([.?!])","One sentence. another one? last!")

['One sentence', '.', ' another one', '?', ' last', '!', '']

## Sub 

In [61]:
re.sub(r"[\w.%+-]+@[\w.-]+","[REDACTED]","Recieved email for go_nusts95@gmail.com")

'Recieved email for [REDACTED]'

In [62]:
import re
def convert_phone_number(phone):
    result = re.sub(r"\b(\d{3})-(\d{3})-(\d{4})\b",r"(\1) \2-\3",phone)
    return result

print(convert_phone_number("My number is 212-345-9999.")) # My number is (212) 345-9999.
print(convert_phone_number("Please call 888-555-1234")) # Please call (888) 555-1234
print(convert_phone_number("123-123-12345")) # 123-123-12345
print(convert_phone_number("Phone number of Buckingham Palace is +44 303 123 7300")) # Phone number of Buckingham Pa

My number is (212) 345-9999.
Please call (888) 555-1234
123-123-12345
Phone number of Buckingham Palace is +44 303 123 7300


#### We're working with a CSV file, which contains employee information. Each record has a name field, followed by a phone number field, and a role field. The phone number field contains U.S. phone numbers, and needs to be modified to the international format, with "+1-" in front of the phone number. Fill in the regular expression, using groups, to use the transform_record function to do that.

In [63]:
import re
def transform_record(record):
    new_record = re.sub(r"\,(\d{3})",r",+1-\1",record)
    return new_record

print(transform_record("Sabrina Green,802-867-5309,System Administrator")) 
# Sabrina Green,+1-802-867-5309,System Administrator

print(transform_record("Eli Jones,684-3481127,IT specialist")) 
# Eli Jones,+1-684-3481127,IT specialist

print(transform_record("Melody Daniels,846-687-7436,Programmer")) 
# Melody Daniels,+1-846-687-7436,Programmer

print(transform_record("Charlie Rivera,698-746-3357,Web Developer")) 
# Charlie Rivera,+1-698-746-3357,Web Developer

Sabrina Green,+1-802-867-5309,System Administrator
Eli Jones,+1-684-3481127,IT specialist
Melody Daniels,+1-846-687-7436,Programmer
Charlie Rivera,+1-698-746-3357,Web Developer


#### The multi_vowel_words function returns all words with 3 or more consecutive vowels (a, e, i, o, u). Fill in the regular expression to do that.

In [64]:
import re
def multi_vowel_words(text):
    pattern = r"(\w+[aeiou]{3,}\w+)"
    result = re.findall(pattern, text)
    return result

print(multi_vowel_words("Life is beautiful")) 
# ['beautiful']

print(multi_vowel_words("Obviously, the queen is courageous and gracious.")) 
# ['Obviously', 'queen', 'courageous', 'gracious']

print(multi_vowel_words("The rambunctious children had to sit quietly and await their delicious dinner.")) 
# ['rambunctious', 'quietly', 'delicious']

print(multi_vowel_words("The order of a data queue is First In First Out (FIFO)")) 
# ['queue']

print(multi_vowel_words("Hello world!")) 
# []

['beautiful']
['Obviously', 'queen', 'courageous', 'gracious']
['rambunctious', 'quietly', 'delicious']
['queue']
[]
