## REGEX (REGULAR EXPRESSIONS)

—sometimes called regex—is a string of characters that specifies a pattern to match against some text. 

In [23]:
log = "July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade"
index = log.index("[")
print(log[index:index+6])

[12345


In [24]:
import re
log = "July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade"
regex = r"\[(\d+)\]"
result = re.search(regex, log)
print(result[1])

12345


In [25]:
import re
result = re.search(r"aza", "bazaar")
print(result)

result = re.search(r"aza", "maze")
print(result)

print(re.search(r"^x", "xenon"))

print(re.search(r"p.ng", "penguin"))

print(re.search(r"p.ng", "clapping"))
print(re.search(r"p.ng", "sponge"))

print(re.search(r"p.ng", "Pangaea", re.IGNORECASE))

<re.Match object; span=(1, 4), match='aza'>
None
<re.Match object; span=(0, 1), match='x'>
<re.Match object; span=(0, 4), match='peng'>
<re.Match object; span=(4, 8), match='ping'>
<re.Match object; span=(1, 5), match='pong'>
<re.Match object; span=(0, 4), match='Pang'>


In [26]:
import re
def check_aei (text):
  result = re.search(r"a.e.i", text)
  return result != None

print(check_aei("academia")) # True
print(check_aei("aerial")) # False
print(check_aei("paramedic")) # True

True
False
True


### WHILDCARDS AND CHARACKTER CLASSES

In [27]:
import re
print(re.search(r"[Pp]ython", "Python"))

<re.Match object; span=(0, 6), match='Python'>


In [28]:
import re
print(re.search(r"[a-z]way", "The end of the highway"))
print(re.search(r"[a-z]way", "What a way to go"))
print(re.search("cloud[a-zA-Z0-9]", "cloudy"))
print(re.search("cloud[a-zA-Z0-9]", "cloud9"))

<re.Match object; span=(18, 22), match='hway'>
None
<re.Match object; span=(0, 6), match='cloudy'>
<re.Match object; span=(0, 6), match='cloud9'>


In [29]:
import re
def check_punctuation (text):
  result = re.search(r"[,.:;?!]", text)
  return result != None

print(check_punctuation("This is a sentence that ends with a period.")) # True
print(check_punctuation("This is a sentence fragment without a period")) # False
print(check_punctuation("Aren't regular expressions awesome?")) # True
print(check_punctuation("Wow! We're really picking up some steam now!")) # True
print(check_punctuation("End of the line")) # False

True
False
True
True
False


In [30]:
import re
print(re.search(r"[^a-zA-Z]", "This is a sentence with spaces."))
print(re.search(r"[^a-zA-Z ]", "This is a sentence with spaces."))

print(re.search(r"cat|dog", "I like cats."))
print(re.search(r"cat|dog", "I love dogs!"))
print(re.search(r"cat|dog", "I like both dogs and cats."))

print(re.search(r"cat|dog", "I like cats."))
print(re.search(r"cat|dog", "I love dogs!"))
print(re.search(r"cat|dog", "I like both dogs and cats."))
print(re.findall(r"cat|dog", "I like both dogs and cats."))

<re.Match object; span=(4, 5), match=' '>
<re.Match object; span=(30, 31), match='.'>
<re.Match object; span=(7, 10), match='cat'>
<re.Match object; span=(7, 10), match='dog'>
<re.Match object; span=(12, 15), match='dog'>
<re.Match object; span=(7, 10), match='cat'>
<re.Match object; span=(7, 10), match='dog'>
<re.Match object; span=(12, 15), match='dog'>
['dog', 'cat']


### REPETITION QUALIFIERS

In [31]:
import re
print(re.search(r"Py.*n", "Pygmalion"))
print(re.search(r"Py.*n", "Python Programming"))
print(re.search(r"Py[a-z]*n", "Python Programming"))
print(re.search(r"Py[a-z]*n", "Pyn"))

<re.Match object; span=(0, 9), match='Pygmalion'>
<re.Match object; span=(0, 17), match='Python Programmin'>
<re.Match object; span=(0, 6), match='Python'>
<re.Match object; span=(0, 3), match='Pyn'>


In [32]:
import re
print(re.search(r"o+l+", "goldfish"))
print(re.search(r"o+l+", "woolly"))
print(re.search(r"o+l+", "boil"))

<re.Match object; span=(1, 3), match='ol'>
<re.Match object; span=(1, 5), match='ooll'>
None


In [33]:
import re
print(re.search(r"p?each", "To each their own"))
print(re.search(r"p?each", "I like peaches"))

<re.Match object; span=(3, 7), match='each'>
<re.Match object; span=(7, 12), match='peach'>


### ESCAPING CHARACTER

In [34]:
import re
print(re.search(r".com", "welcome"))
print(re.search(r"\.com", "welcome"))
print(re.search(r"\.com", "mydomain.com"))

<re.Match object; span=(2, 6), match='lcom'>
None
<re.Match object; span=(8, 12), match='.com'>


In [35]:
import re
print(re.search(r"\w*", "This is an example"))
print(re.search(r"\w*", "And_this_is_another"))

<re.Match object; span=(0, 4), match='This'>
<re.Match object; span=(0, 19), match='And_this_is_another'>


### REGEX IN ACTION

In [36]:
import re
print(re.search(r"A.*a", "Argentina"))
print(re.search(r"A.*a", "Azerbaijan"))
print(re.search(r"^A.*a$", "Australia"))

<re.Match object; span=(0, 9), match='Argentina'>
<re.Match object; span=(0, 9), match='Azerbaija'>
<re.Match object; span=(0, 9), match='Australia'>


In [37]:
import re
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
print(re.search(pattern, "_this_is_a_valid_variable_name"))
print(re.search(pattern, "this isn't a valid variable"))
print(re.search(pattern, "my_variable1"))
print(re.search(pattern, "2my_variable1"))

<re.Match object; span=(0, 30), match='_this_is_a_valid_variable_name'>
None
<re.Match object; span=(0, 12), match='my_variable1'>
None


In [38]:
import re
def check_sentence(text):
  result = re.search(r"^[A-Z].*[.?!]", text)
  return result != None

print(check_sentence("Is this is a sentence?")) # True
print(check_sentence("is this is a sentence?")) # False
print(check_sentence("Hello")) # False
print(check_sentence("1-2-3-GO!")) # False
print(check_sentence("A star is born.")) # True

True
False
False
False
True


### QUIZ BASIC REGEX

In [39]:
import re
def check_web_address(text):
  pattern = r"\.[a-zA-Z]{2,}$"
  result = re.search(pattern, text)
  return result != None

print(check_web_address("gmail.com")) # True
print(check_web_address("www@google")) # False
print(check_web_address("www.Coursera.org")) # True
print(check_web_address("web-address.com/homepage")) # False
print(check_web_address("My_Favorite-Blog.US")) # True


True
False
True
False
True


In [None]:
import re
def check_time(text):
  pattern = r"(1[0-2]?|[0-9]:[0-5][0-9])+[ ]?(am|pm|AM|PM)$"
  result = re.search(pattern, text)
  return result != None

print(check_time("12:45pm")) # True
print(check_time("9:59 AM")) # True
print(check_time("6:60am")) # False
print(check_time("five o'clock")) # False
print(check_time("6:02 am")) # True
print(check_time("6:02km")) # False

In [None]:
import re
def contains_acronym(text):
  pattern = r".*\([A-Za-z0-9]+\).*"
  result = re.search(pattern, text)
  return result != None
print(contains_acronym("Instant messaging (IM) is a set of communication technologies used for text-based communication")) # True
print(contains_acronym("American Standard Code for Information Interchange (ASCII) is a character encoding standard for electronic communication")) # True
print(contains_acronym("Please do NOT enter without permission!")) # False
print(contains_acronym("PostScript is a fourth-generation programming language (4GL)")) # True
print(contains_acronym("Have fun using a self-contained underwater breathing apparatus (Scuba)!")) # True

In [None]:
import re

def correct_function(text):
  result = re.search(r".*( \d{5})([-\d{4}]?).*", text)  # Corrected regex pattern with space
  return result != None

def check_zip_code(text):
  return correct_function(text)  # Call the correct_function

# Call the check_zip_code function with test cases
print(check_zip_code("The zip codes for New York are 10001 thru 11104."))  # True
print(check_zip_code("90210 is a TV show"))  # False (no space before 90210)
print(check_zip_code("Their address is: 123 Main Street, Anytown, AZ 85258-0001."))  # True
print(check_zip_code("The Parliament of Canada is at 111 Wellington St, Ottawa, ON K1A0A9."))  # False