## Regular Expressions

In [1]:
import re

#### re.search()
Returns a Match Object if there is a match anywhere in the string

In [4]:
match = re.search(r'hello', "hello world")

if match: # this means: if match exists
    print("Yes")
else:
    print("No")
    
match = re.search(r'hello', "Hello world")

if match: 
    print("Yes")
else:
    print("No")

Yes
No


#### re.findall()
Returns a list containing all matches

In [5]:
match = re.findall(r'hello', "hello world")

print(match)

['hello']


In [6]:
match = re.findall(r'o', "hello world")

print(match)

['o', 'o']


In [7]:
match = re.findall(r'a', "hello world")

print(match)

[]


### Anchors: start and end of strings

In [8]:
match = re.findall(r'^hello', "hello world")

print(match)

['hello']


In [9]:
match = re.findall(r'hello$', "hello world")

print(match)

[]


In [10]:
match = re.findall(r'world$', "hello world")

print(match)

['world']


### Special characters


In [13]:
match = re.findall(r'\d', "hello world112!") # numbers

print(match)

['1', '1', '2']


In [15]:
match = re.findall(r'\D', "hello world112!") # anything but numbers

print(match)

['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!']


In [18]:
match = re.findall(r'\w', "hello world112_!") # letters, numbers and _

print(match)

['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', '1', '1', '2', '_']


In [20]:
match = re.findall(r'\s', "hello world112_!") # spaces, new lines, tabs

print(match)

[' ']


In [21]:
match = re.findall(r'.', "hello world112_!") # any characters

print(match)

['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '1', '1', '2', '_', '!']


In [22]:
match = re.findall(r'\.', "hello world112_!") # a dot

print(match)

[]


### Quantifiers

In [24]:
match = re.findall(r'hello!*', "hello!!!!") # "hello" followed by 0 or more "!"

print(match)

['hello!!!!']


In [25]:
match = re.findall(r'hello!+', "hello!!!!") # "hello" followed by 1 or more "!"

print(match)

['hello!!!!']


In [26]:
match = re.findall(r'hello!?', "hello!!!!") # "hello" followed by 0 or 1

print(match)

['hello!']


In [27]:
match = re.findall(r'hello!{4}', "hello!!!!") # "hello" followed by exactly 4 "!"

print(match)

['hello!!!!']


### Groups

In [31]:
match = re.findall(r'hello(?:!2!)+', "hello!!!!") # "hello" followed by one or more of the sequence "!2!"

print(match)

[]


In [32]:
match = re.findall(r'hello(?:!2!)+', "hello!2!!2!!2!") # "hello" followed by one or more of the sequence "!2!"

print(match)

['hello!2!!2!!2!']


In [33]:
match = re.findall(r'hello(!2!)+', "hello!2!!2!!2!") # the sequence grouped in the parentheses, "!2!"

print(match)

['!2!']


### Character classes

In [34]:
match = re.findall(r'[ab]', "Dear brother") 

print(match)

['a', 'b']


In [37]:
match = re.findall(r'[^ab]', "Dear brother") 

print(match)

['D', 'e', 'r', ' ', 'r', 'o', 't', 'h', 'e', 'r']


In [38]:
match = re.findall(r'[Hh]ello', "Hello") 

print(match)

['Hello']


In [39]:
match = re.findall(r'[Hh]ello', "jello") 

print(match)

[]


In [40]:
match = re.findall(r'[^Hh]ello', "jello") 

print(match)

['jello']


In [35]:
match = re.findall(r'[1-7]', "I leave at 5:30") 

print(match)

['5', '3']


In [36]:
match = re.findall(r'[a-zA-Z]', "I leave at 5:30") 

print(match)

['I', 'l', 'e', 'a', 'v', 'e', 'a', 't']


#### re.sub()
Replaces one or many matches with a string

In [42]:
match = re.sub(r'Hello',"Goodbye", "Hello world!") 

print(match)

Goodbye world!
