In [207]:
import re
# .match() Determine if the RE matches at the beginning of the string
# .search() Scan through a string, looking for any location where this RE matches
# .findall() Find all substrings where the RE matches, and returns them as a list
# .finditer() Find all substrings where the RE matches, and returns them as an iterator.

In [208]:
sent = "This book on tennis cost $3.9994 at Walmart where tenia tanto."
p1 = re.compile("This")
m1 = p1.match(sent)
print(m1)

<re.Match object; span=(0, 4), match='This'>


In [209]:
mUpper = re.compile(r"\b[wW]almart\b") #include word boundaries (\b), which ensure that the pattern matches only the word "Walmart" (with either uppercase or lowercase "W") and not a substring of a word. [wW]almart will also work but other matches like walmarts will also appear.
mUpperorLower = re.findall(mUpper, sent)
print(mUpperorLower)

['Walmart']


In [210]:
letters = re.compile("[abc]") 
# We can do exactly the same with digits.
# For all lowecase() letters [a-z]. 
# upperCase() letters [A-Z]. 
# Not upperCase() [^A-Z]...
myletters = re.findall(letters, sent)
print(myletters)

['b', 'c', 'a', 'a', 'a', 'a', 'a']


In [211]:
interrogationOperator = re.compile(r"\btennis?\b") # Searches both: with the preceding character to the interrogation and without the character.
myinterrogationOperator = re.findall(interrogationOperator, sent)
print(myinterrogationOperator)

['tennis']


Match searches at the beginning of the phrase and search searches througout all the sentence!!

In [212]:
p2 = re.compile(".*ten,*") # Kleene star means “zero or more occurrences of the immediately previous character or regular expression”
m2 = p2.match(sent)
print(m2)

<re.Match object; span=(0, 53), match='This book on tennis cost $3.9994 at Walmart where>


In [213]:
m3 = re.search(p1,sent)
print(m3)

<re.Match object; span=(0, 4), match='This'>


In [214]:
m4 = re.search(p2,sent)
print(m4)

<re.Match object; span=(0, 53), match='This book on tennis cost $3.9994 at Walmart where>


In [215]:
pp1 = re.compile("is")
m5 = re.findall(pp1, sent)
print(m5)

['is', 'is']


In [216]:
pp2 = re.compile("\\d") # Digit
m6 = re.search(pp2, sent)
print(m6)

<re.Match object; span=(26, 27), match='3'>


In [217]:
pp3 = re.compile("\\d+")
m7 = re.search(pp3, sent)
print(m7)

<re.Match object; span=(26, 27), match='3'>


In [218]:
pp3 = re.compile("\\$d\\d+\\.\\d\\d+") # will match a string that starts with the dollar sign ($) followed by a single digit (0-9), one or more digits (0-9), a period (.), and two more digits (0-9).
m8 = re.search(pp3, sent) 
print(m7)

<re.Match object; span=(26, 27), match='3'>


In [219]:
ppp1 = re.compile("(\\w+) cost (\\$\\d+\\.\\d+)") # Whatever is before cost and any digit witht he following structure $d+.d+
mm1 = re.search(ppp1, sent)
mm1

<re.Match object; span=(13, 32), match='tennis cost $3.9994'>

In [220]:
mm1.group(0)

'tennis cost $3.9994'

In [221]:
mm1.group(1)

'tennis'

In [230]:
mm1.group(2)

'$3.9994'

In [223]:
line = "Cats are smarter than cats";
searchObj = re.search(r'(.*) are (.*) .*', line, re.M|re.I)

In [224]:
if searchObj:
    print ("searchObj.group() : ", searchObj.group())
    print ("searchObj.group(1) : ", searchObj.group(1))
    print ("searchObj.group(2) : ", searchObj.group(2))
else:
    print("Nothing found!")

searchObj.group() :  Cats are smarter than cats
searchObj.group(1) :  Cats
searchObj.group(1) :  smarter than


In [225]:
target_string = "the price of PINEAPPLE ice cream is 20"

result = re.search(r"(\b[A-Z]+\b).+(\b\d+)", target_string)

print(result.group(0))

print(result.groups())

print(result.group(1))

print(result.group(2))

PINEAPPLE ice cream is 20
('PINEAPPLE', '20')
PINEAPPLE
20


In [226]:
target_string = "The price of ice-creams PINEAPPLE is 20 MANGO is 30 CHOCOLATE is 40"
pattern = re.compile(r"(\b[A-Z]+\b) is (\b\d+\b)")

for match in pattern.finditer(target_string):
    print(match.group(1))
    print(match.group(2))

PINEAPPLE
20
MANGO
30
CHOCOLATE
40


In [227]:
ip = 'They ate 5 apples and 5 trucks'
re.sub(r'5', 'five', ip)

'They ate five apples and five trucks'

In [228]:
stringCheck = "Check this string has characteres from c to h"

In [231]:
def is_allowed_specific_char(string):
    charRe = re.compile(r'[^a-zA-Z0-9]')
    string = charRe.search(string)
    return not bool(string)
print(is_allowed_specific_char("ABCDEFabcdef123450"))
print(is_allowed_specific_char("*&%@#!}{"))

True
False
