# Raw Strings 

In [1]:
# Difference between raw strings and normal strings
print("string1","C:\desktop\natalie") # normal string
print("\n")
print("string2",r"C:\desktop\natalie")

string1 C:\desktop
atalie


string2 C:\desktop\natalie


# re.match() 

In [3]:
import re

# match a word at the beginning of a string

result = re.match('Data Science', r'Data Science is the hottest job of the 21st century.')
print(result)

result_2 = re.match('hottest', r'Data Science is the hottest job of the 21st century.')
print(result_2)

<re.Match object; span=(0, 12), match='Data Science'>
None
Data Science


In [4]:
print(result.group())

Data Science


# re.search() 

In [6]:
result = re.search('founded', r'Elon Musk founded Tesla. He also founded SpaceX.')
print(result) # Also returns a match object.
print(result.group()) 

<re.Match object; span=(10, 17), match='founded'>
founded


# re.findall() 

In [7]:
result = re.findall('founded', r'Elon Musk founded Tesla. He also founded SpaceX.')
print(result) 

['founded', 'founded']


In [4]:
import re
pattern = r'\d\d\d-\d\d\d-\d\d\d\d'
string = 'Cell: 415-555-9999 Work: 212-555-0000'
re.findall(pattern,string)

['415-555-9999', '212-555-0000']

In [5]:
re.findall(r'[abc]','a ab abc de')

['a', 'a', 'b', 'a', 'b', 'c']

# regex Summary 

In [1]:
# Import the regex module
import re
# Create a regex object with re.compile() 
phoneRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
# Pass the string to be searched into the search method  
match_object = phoneRegex.search('Cell: 415-555-9999 Work: 212-555-0000')
match_object

<re.Match object; span=(6, 18), match='415-555-9999'>

In [3]:
# Call the match object's group() method to return a string of the matched text
match_object.group()

'415-555-9999'

In [6]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = phoneNumRegex.search('My number is 415-555-4242.')
mo

<re.Match object; span=(13, 25), match='415-555-4242'>

# Square Brackets 

In [7]:
pattern = r'[abc]' # same as [a-c]
string1 = 'Abc,xyz,de' # 2 matches 
string2 = 'car,aec' # 4 matches 
string3 = 'def' # no match
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 
print(re.findall(pattern,string3)) 

['b', 'c']
['c', 'a', 'a', 'c']
[]


# phone Number program without regex 

In [8]:
# Without using Regular Expressions
def isPhoneNumber(text):
    if len(text) != 12: # not size of phone number
        return False
    for i in range(0,3):
        if not text[i].isdecimal():
            return False # no area code
    if text[3] != '-':
        return False # missing dash
    for i in range(4,7):
        if not text[i].isdecimal():
            return False # no first 3 digits
    if text[7] != '-':
        return False # missing dash
    for i in range(8,12):
        if not text[i].isdecimal():
            return False # no last 4 digits
    return True

message = 'Call me at 132-222-7218, or at 132-222-1111'
foundNumber = False
for i in range(len(message)):
    chunk = message[i:i+12]
    if isPhoneNumber(chunk):
        print('Phone number found: ' + chunk)
        foundNumber = True
if not foundNumber:
    print('Could not find any phone numbers.')

Phone number found: 132-222-7218
Phone number found: 132-222-1111


# phone Number program Using Regexes 

In [9]:
# Using Regular Expressions
a = re.findall(r'\d{3}-\d{3}-\d{4}', 'Call me at 132-222-7218, or at 132-222-1111')
for i in range(len(a)):
    print('Phone Number found: '+ a[i])

Phone Number found: 132-222-7218
Phone Number found: 132-222-1111


# Period

In [45]:
pattern = r'.s' # match string containing any character followed by 's'
string1 = 'zs,as,es' # 3 matches
string2 = '&s,42sc,:s' # 3 matches 
string3 = '-s' # 1 match
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 
print(re.findall(pattern,string3)) 

['zs', 'as', 'es']
['&s', '2s', ':s']
['-s']


# Caret

In [10]:
pattern = r'^B' # match any string that starts with 'A'
string1 = 'basketball' # no match
string2 = 'boB' # no match 
string3 = 'Bat' # 1 match
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 
print(re.findall(pattern,string3)) 

[]
[]
['B']


# Dollar

In [12]:
pattern = r'ing$' # match any string that ends with 'ing'
string1 = 'Joking' # 1 match
string2 = 'CALLING' # no match 
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 

['ing']
[]


# Question Mark

In [15]:
pattern = r'bat(woman|man|girl)?' # match 0 or 1 instance of 'woman' or 'bat' or 'girl' attached to the word 'bat'
string1 = 'Batman, bat, batgirl, batwoman' # 3 matches
print(re.findall(pattern,string1)) 

['', 'girl', 'woman']


# Braces

In [39]:
pattern = r'(wow){3,5}' # matches 3 to atmost 5 instances of the word wow
wowRegex = re.compile(pattern)

# Examples 
string1 = 'wowwowwow' # match
string2 = 'wowwowwowwowwow' # match 
string3 = 
string4 = 'wow' # no match

mo1 = wowRegex.search(string1)
mo2 = wowRegex.search(string2)
print(mo1.group()) 
print(mo2.group())
print(re.findall(pattern,string3)) # re.group() would return error here as there is no match

wowwowwow
wowwowwowwowwow
[]


# Star

In [43]:
pattern = r'da*ta' # match 0 or more occurences of the first 'a' in 'data'
string1 = 'data' # match
string2 = 'daaaaata' # match 
string3 = 'dta' # match 
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 
print(re.findall(pattern,string3)) 

['data']
['daaaaata']
['dta']


# Plus

In [44]:
pattern = r'da+ta' # match 1 or more occurences of the first 'a' in 'data'
string1 = 'data' # match
string2 = 'daaaaata' # match 
string3 = 'dta' # no match
print(re.findall(pattern,string1)) 
print(re.findall(pattern,string2)) 
print(re.findall(pattern,string3)) 

['data']
['daaaaata']
[]


### Extract Grocery list using Regexes

In [53]:
message = '''I am going to the grocery tomorrow and need to get the following:
                3 onions,
                1 bread,
                1 Milk,
                10 bananas,
                5 peppers and 
                12 oranges.'''

groceryRegex = re.compile(r'''\d+ # one or more digits
                              \s  # space character
                              \w+ # one or more words
                              ''', re.VERBOSE) # re.VERBOSE can be used to make regexes more readable
print(groceryRegex.findall(message))

['3 onions', '1 bread', '1 Milk', '10 bananas', '5 peppers', '12 oranges']


# Re.match 

In [5]:
import re

#match a word at the beginning of a string

result = re.match('Data Science',r'Data Science is the hottest job of the 21st century.') 
print(result)

result_2 = re.match('largest',r'Data Science is the hottest job of the 21st century.') 
print(result_2)

<re.Match object; span=(0, 12), match='Data Science'>
None


In [6]:
print(result.group())  #returns the total matches

Data Science


In [1]:
import re

#matches a word at the beginning of a string

result = re.match('Analytics',r'Analytics Vidhya is the largest data science community of India') 
print(result)

result_2 = re.match('largest',r'Analytics Vidhya is the largest data science community of India') 
print(result_2)

<re.Match object; span=(0, 9), match='Analytics'>
None


In [5]:
print(result.group())  #returns the total matches

founded


# Re.search 

In [7]:
result = re.search('founded',r'Elon Musk founded Tesla. He also founded SpaceX.')  
print(result) # Also returns a match object
print(result.group()) # use .group() to return total matches

<re.Match object; span=(10, 17), match='founded'>
founded


# Re.findall

In [9]:
result = re.findall('founded',r'Elon Musk founded Tesla. He also founded SpaceX.')  
print(result)

['founded', 'founded']
