In [1]:
# Regular expressions allow you to locate and change
# string in very powerful ways.
# They work in almost exactly the same way in every
# programming language as well.

# Regular expressions (Regex) are used to
# 1. Search fora specific string in a large amount of data
# 2. Verify that a string has the proper format (Email, Phone #, etc)
# 3. Find a string and replace it with another string
# 4. Format data into a proper form for importing for example

# import the Regex module
import re

# ------------ Was a Match found -------------

# Search for ape in the string
if re.search("ape",'The ape was at the apex'):
    print("There is an ape")

There is an ape


In [3]:
import re
# ------------ Get All Matches --------------

# findall() return a list of mathes
# . is used to match any 1 character or space
allApes = re.findall("ape.","The ape was at the apex")

for i in allApes:
    print(i)

ape 
apex


In [5]:
# finditer returns an iterator of matching objects
# Yopu can use spa to get the location

theStr = "The ape was at the apex"

for i in re.finditer("ape.",theStr):
    
    # Spam returns a tuple
    locTuple = i.span()
    
    print(locTuple)
    
    # Slice the match out using the tuple values
    print(theStr[locTuple[0]:locTuple[1]])

(4, 8)
ape 
(19, 23)
apex


In [6]:
import re
# ------------- Match 1 of Several Letters ---------------

# Square brackets with match any one of the characters betweeen
# the brackets not including  upper and Lowercase varieties
# unless they are listed

animalStr = "Cat rat mat fat pat"

allAnimals = re.findall("[crmfp]at",animalStr)

for i in allAnimals:
    print(i)
    
print()

rat
mat
fat
pat



In [7]:
# We can also allow for characters in a range
# Remember to include upper and lowercase letters

animalStr = "Cat rat mat fat pat"
someAnimals = re.findall("[c-mC-M]at",animalStr)
for i in someAnimals:
    print(i)
    
print()    


Cat
mat
fat



In [8]:
import re
# ------------ Replace All matches ----------------

# Replace matching items in a string

owlFood = "rat cat mat pat"

# You can compile a regex into pattern objects which
# provide additional methods
regex = re.compile("[cr]at")

# sub() replaces items that match the regex in the string
# with the 1st attribute string passed to sub
owlFood = regex.sub("owl",owlFood)

print(owlFood)

owl owl mat pat


In [9]:
import re
# ------------ Solving backslash Problems ---------------

# Regex use the backslash to designate special characters
# and Python does the same inside strings which causes
# issues.

# Let's try to get "\\stuff" out of a string

randStr = "Here is \\stuff"

# This won't find it
print("Find \\stuff :", re.search("\\stuff",randStr))

# This does, but we have to put in 4 slashes which is
# messy
print("Find \\stuff :", re.search("\\\\stuff",randStr))

# You can get around this by using raw string which
# don't treat backlashes as special
print("Find \\stuff :", re.search(r"\\stuff",randStr))

Find \stuff : None
Find \stuff : <re.Match object; span=(8, 14), match='\\stuff'>
Find \stuff : <re.Match object; span=(8, 14), match='\\stuff'>
