### Regex

In [1]:
import re

In [9]:
print(re.search("hello", "hello world")) # returns a match object

<_sre.SRE_Match object; span=(0, 5), match='hello'>


In [8]:
print(re.search("foo123", "bar"))

None


In [2]:
patterns = ["term1", "term2"]

In [3]:
text = "This is a string with term1 but not the other term"

In [5]:
for pattern in patterns:
    print(f"Search for {pattern} in text")
    
    if re.search(pattern, text):
        print("\nMatch found\n")
    else:
        print("\nMatch NOT found\n")

Search for term1 in text

Match found

Search for term2 in text

Match NOT found



### The match object

In [10]:
match = re.search(patterns[0], text)

In [13]:
match.start()

22

In [14]:
match.end()

27

In [15]:
split_term = "@"
text = "What's your email? hello@gmail.com?"

In [16]:
re.split(split_term, text)

["What's your email? hello", 'gmail.com?']

In [19]:
re.findall("match", "here is one match, and another match...")

['match', 'match']

### Meta Characters

In [25]:
def multi_re_find(patterns, phrase):
    for pattern in patterns:
        print(f"Search phrase using the re check: {pattern}")
        print(re.findall(pattern, phrase))
        print("\n")

In [26]:
test_text = "sdsd...sssddd...sdddsddd...dsds...dssss...sdddd"
patterns = [
    "sd*", # s followed by 0 or more d
    "sd+", # s followed by 1 or more d
    "sd?", # s followed by 1 or 0 d
    "sd{3}", # s followed by d repeat 3 times
    "sd{2,3}" # s followed by d repeat 2 to 3 times
]

In [27]:
multi_re_find(patterns, test_text)

Search phrase using the re check: sd*
['sd', 'sd', 's', 's', 'sddd', 'sddd', 'sddd', 'sd', 's', 's', 's', 's', 's', 'sdddd']


Search phrase using the re check: sd+
['sd', 'sd', 'sddd', 'sddd', 'sddd', 'sd', 'sdddd']


Search phrase using the re check: sd?
['sd', 'sd', 's', 's', 'sd', 'sd', 'sd', 'sd', 's', 's', 's', 's', 's', 'sd']


Search phrase using the re check: sd{3}
['sddd', 'sddd', 'sddd', 'sddd']


Search phrase using the re check: sd{2,3}
['sddd', 'sddd', 'sddd', 'sddd']




### Character set

In [28]:
test_text = "sdsd...sssddd...sdddsddd...dsds...dssss...sdddd"
patterns = [
    "[sd]",
    "s[sd]+"
]
multi_re_find(patterns, test_text)

Search phrase using the re check: [sd]
['s', 'd', 's', 'd', 's', 's', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 'd', 's', 'd', 's', 'd', 's', 's', 's', 's', 's', 'd', 'd', 'd', 'd']


Search phrase using the re check: s[sd]+
['sdsd', 'sssddd', 'sdddsddd', 'sds', 'ssss', 'sdddd']




### Exclusion

In [31]:
test_text = "This is a sentence with punctutations. Not good! How can I remove them?"
patterns = [
    "[^?!. ]+"
]
multi_re_find(patterns, test_text)

Search phrase using the re check: [^?!. ]+
['This', 'is', 'a', 'sentence', 'with', 'punctutations', 'Not', 'good', 'How', 'can', 'I', 'remove', 'them']




### Character Ranges

In [33]:
test_text = "This is a string. Let's see if we can find some letters from it."
patterns = [
    "[a-z]+", # sequence of lower case letters
    "[A-Z]+", # sequence of upper case letters
    "[a-zA-Z]+", # sequence of upper and lower case letters
    "[A-Z][a-z]+" # sequence of one upper case letter followed by lower case letters
]
multi_re_find(patterns, test_text)

Search phrase using the re check: [a-z]+
['his', 'is', 'a', 'string', 'et', 's', 'see', 'if', 'we', 'can', 'find', 'some', 'letters', 'from', 'it']


Search phrase using the re check: [A-Z]+
['T', 'L']


Search phrase using the re check: [a-zA-Z]+
['This', 'is', 'a', 'string', 'Let', 's', 'see', 'if', 'we', 'can', 'find', 'some', 'letters', 'from', 'it']


Search phrase using the re check: [A-Z][a-z]+
['This', 'Let']




### Escape Characters

In [34]:
test_text = "This is a string with some number 123 and a symbol of #hashtag"
patterns = [
    r"\d+", # sequence of digits
    r"\D+", # sequence of non-digits
    r"\s+", # sequence of space, tab and newlines
    r"\S+", # sequence of none space, tab and newlines
    r"\w+", # sequence of alphabetical characters
    r"\W+", # sequence of non-alphabetical characters
]
multi_re_find(patterns, test_text)

Search phrase using the re check: \d+
['123']


Search phrase using the re check: \D+
['This is a string with some number ', ' and a symbol of #hashtag']


Search phrase using the re check: \s+
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']


Search phrase using the re check: \S+
['This', 'is', 'a', 'string', 'with', 'some', 'number', '123', 'and', 'a', 'symbol', 'of', '#hashtag']


Search phrase using the re check: \w+
['This', 'is', 'a', 'string', 'with', 'some', 'number', '123', 'and', 'a', 'symbol', 'of', 'hashtag']


Search phrase using the re check: \W+
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' #']


