In [None]:
'''
[]	A set of characters	"[a-m]"	
\	Signals a special sequence (can also be used to escape special characters)	"\d"	
.	Any character (except newline character)	"he..o"	
^	Starts with	"^hello"	
$	Ends with	"planet$"	
*	Zero or more occurrences	"he.*o"	
+	One or more occurrences	"he.+o"	
?	Zero or one occurrences	"he.?o"	
{}	Exactly the specified number of occurrences	"he.{2}o"	
|	Either or	"falls|stays"	
()	Capture and group

Expressions &	Explanations
\w 
Matches alphanumeric characters, that is a-z, A-Z, 0-9, and underscore(_)
\W
Matches non-alphanumeric characters, that is except a-z, A-Z, 0-9 and _
\d
Matches digits, from 0-9.
\D 
Matches any non-digits.
\s
Matches whitespace characters, which also include the \t, \n, \r, and space characters.
\S
Matches non-whitespace characters.
\A
Matches the expression to its right at the absolute start of a string whether in single or multi-line mode.
\Z 
Matches the expression to its left at the absolute end of a string whether in single or multi-line mode.
\n
Matches a newline character
\t
Matches tab character
\b
Matches the word boundary (or empty string) at the start and end of a word.
\B
Matches where \b does not, that is, non-word boundary
'''

In [3]:
import re
import warnings
warnings.filterwarnings("ignore")

In [2]:
if re.search("ape", "The ape was at the apex"):
    print("Found a match")

Found a match


In [5]:
all_apes = re.findall("ape", "The ape was at the apex")
for i in all_apes:
    print(i)

ape
ape


In [6]:
the_str = "The ape was at the apex"
for i in re.finditer("ape.", the_str):
    loc_tup = i.span() # return a tuple of start and end positions
    print(loc_tup)
    print(the_str[loc_tup[0]:loc_tup[1]])

(4, 8)
ape 
(19, 23)
apex


In [8]:
animal_str = "Cat rat mat fat pat"
all_animals = re.findall("[crmfp]at", animal_str)
for i in all_animals:
    print(i)

rat
mat
fat
pat


In [9]:
some_animals = re.findall("[c-mC-M]at", animal_str)
for i in some_animals:
    print(i)

Cat
mat
fat


In [10]:
all_animals = re.findall("[^Cr]at", animal_str) # Find all animals that are not cats or rats
for i in all_animals:
    print(i)

mat
fat
pat


In [13]:
owl_food = "rat cat mat pat"
regex = re.compile("[cm]at")
owl_food = regex.sub("owl", owl_food)
print(owl_food) 

rat owl owl pat


In [17]:
rand_str = "Here is \\stuff"
# print("Find \\stuff: ", re.search("\\\\stuff", rand_str))

print("Find \\stuff: ", re.search(r"\\stuff", rand_str))

Find \stuff:  <re.Match object; span=(8, 14), match='\\stuff'>


In [2]:
rnd_str = "F.B.I. I.R.S. CIA"
print(re.findall(r".\..\..", rnd_str))

['F.B.I', 'I.R.S']


In [8]:
rand_str = '''This is a
long string
of 3 lines'''

print(rand_str)
regex = re.compile("\n")
rand_str = regex.sub("  ", rand_str)
print(rand_str)

# \b \f \r \t \v \r\n

This is a
long string
of 3 lines
This is a  long string  of 3 lines


In [11]:
rnd_str = "12345"
print("Matches :", len(re.findall("\d", rnd_str)))

Matches : 5


In [12]:
rnd_str = "12345"
if re.search("\d{5}", rnd_str):
    print("It is zip code")

It is zip code


In [15]:
rnd = "123 12345 123456 1234567"
print("Matches: ", len(re.findall("\d{5,7}", rnd)))

Matches:  3


In [13]:
phone_str = "123-555-1234, 456.555.4321, (789)555-9876"

phone_pattern = re.compile(r"(\d{3}[-\.]\d{3}[-\.]\d{4}|\(\d{3}\)\d{3}[-\.]\d{4})")
phone_numbers = phone_pattern.findall(phone_str)

for number in phone_numbers:
    print(f"Valid phone number found: {number}")

test_number = "123-555-1234"
if phone_pattern.match(test_number):
    print(f"{test_number} is a valid phone number format")

Valid phone number found: 123-555-1234
Valid phone number found: 456.555.4321
Valid phone number found: (789)555-9876
123-555-1234 is a valid phone number format


In [18]:
ph = "123-1234-123"
if re.search("\w{3}-\w{4}-\w{3}", ph):
    print("Phone number is valid")
if re.search("\w{2,20}", "0xArchit"):
    print("Name is valid")

Phone number is valid
Name is valid


In [22]:
if re.search("\w{2,20}\s\w{2,20}\s\w{2,10}\s\w{2,5}", "0xarchit AkA Archit Jain"):
    print("Match found")

Match found


In [26]:
print("Matches: ", len(re.findall("a+", "a as has bug")))

Matches:  3


In [10]:
email_list = "db@aol.com m@.com @apple.com db@.com"
print("Email Matches :", len(re.findall("[\w._%+-]{1,20}@[\w.-]{2,20}.[A-Za-z]{2,3}", email_list)))

Email Matches : 1
