In [None]:
'''
.       - Any Character Except New Line
\d      - Digit (0-9)
\D      - Not a Digit (0-9)
\w      - Word Character (a-z, A-Z, 0-9, _)
\W      - Not a Word Character
\s      - Whitespace (space, tab, newline)
\S      - Not Whitespace (space, tab, newline)

\b      - Word Boundary
\B      - Not a Word Boundary
^       - Beginning of a String
$       - End of a String

[]      - Matches Characters in brackets
[^ ]    - Matches Characters NOT in brackets
|       - Either Or
( )     - Group

Quantifiers:
*       - 0 or More
+       - 1 or More
?       - 0 or One
{3}     - Exact Number
{3,4}   - Range of Numbers (Minimum, Maximum)


#### Sample Regexs ####

[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+

'''

In [4]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.123
123*555*-1234
123.555.123456
800-555-1234
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''


In [2]:
import re

In [None]:
# extract all the phone number with the pattern nnn.nnn.nnnn

In [9]:
search = re.compile(r'\d\d\d.\d\d\d.\d\d\d\d')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>
<re.Match object; span=(224, 236), match='800-555-1234'>
<re.Match object; span=(237, 249), match='900-555-1234'>


In [10]:
search = re.compile(r'\d\d\d\.\d\d\d\.\d\d\d\d')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>


In [12]:
# {} - Exact Number

search = re.compile(r'\d{3}\.\d{3}\.\d{4}')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>


In [5]:
# {3,6}-range --> min and max

search = re.compile(r'\d{3}\.\d{3}\.\d{3,6}')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(184, 195), match='123.555.123'>
<re.Match object; span=(210, 224), match='123.555.123456'>


In [6]:
# [] - Matches character in brackets

search = re.compile(r'[ab]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(82, 83), match='a'>
<re.Match object; span=(85, 86), match='a'>
<re.Match object; span=(87, 88), match='a'>
<re.Match object; span=(92, 93), match='a'>
<re.Match object; span=(95, 96), match='a'>
<re.Match object; span=(97, 98), match='a'>
<re.Match object; span=(113, 114), match='b'>
<re.Match object; span=(119, 120), match='a'>
<re.Match object; span=(156, 157), match='a'>
<re.Match object; span=(158, 159), match='a'>
<re.Match object; span=(161, 162), match='a'>
<re.Match object; span=(162, 163), match='a'>
<re.Match object; span=(258, 259), match='a'>
<re.Match object; span=(276, 277), match='a'>
<re.Match object; span=(288, 289), match='b'>


In [7]:
search = re.compile(r'ab')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(1, 3), match='ab'>


In [10]:
search = re.compile(r'[cat]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(82, 83), match='a'>
<re.Match object; span=(85, 86), match='a'>
<re.Match object; span=(87, 88), match='a'>
<re.Match object; span=(91, 92), match='t'>
<re.Match object; span=(92, 93), match='a'>
<re.Match object; span=(95, 96), match='a'>
<re.Match object; span=(97, 98), match='a'>
<re.Match object; span=(98, 99), match='c'>
<re.Match object; span=(99, 100), match='t'>
<re.Match object; span=(110, 111), match='t'>
<re.Match object; span=(118, 119), match='c'>
<re.Match object; span=(119, 120), match='a'>
<re.Match object; span=(156, 157), match='a'>
<re.Match object; span=(158, 159), match='a'>
<re.Match object; span=(161, 162), match='a'>
<re.Match object; span=(162, 163), match='a'>
<re.Match object; span=(167, 168), match='c'>
<re.Match object; span=(256, 257), match='c'>
<re.Match object; span=(258, 259), match='a'>
<re.Match objec

In [8]:
# extract all the numbers with the pattern nnn.nnn.nnnn/nnn-nnn-nnnn

In [11]:
search = re.compile(r'\d{3}[.-]\d{3}[.-]\d{4}')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(210, 222), match='123.555.1234'>
<re.Match object; span=(225, 237), match='800-555-1234'>
<re.Match object; span=(238, 250), match='900-555-1234'>


In [None]:
# Significance of - between numbers and alphabet

In [13]:
search = re.compile(r'[1-9 a-e]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(57, 58), match='3'>
<re.Match object; span=(58, 59), match='2'>
<re.Match object; span=(59, 60), match='1'>
<re.Match object; span=(61, 62), match='5'>
<re.Match object; span=(62, 63), match='5'>
<re.Match object; span=(63, 64), match='5'>
<re.Match object; span=(65, 66), match='4'>
<re.Match object; span=(66, 67), match='3'>
<re.Match object; span=(67, 68), match='2'>
<re.Match object; span=(68, 69), match='1'>
<re.Match object; span=(70, 71), match='1'>
<re.Match object; span=(71, 72), match='2'>
<re.Match object; span=(72, 73), match='3'>
<re.Match object; span=(73, 74), match='4'>
<re.Match object; span=(74, 75), match='5'>
<re.Match object; span=(75, 76), match='6'>
<re.Match object; span=(76, 77), match='7'>
<re.Match object; span=(77, 78), match='8'

In [24]:
search = re.compile(r'[19-]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(59, 60), match='1'>
<re.Match object; span=(60, 61), match='-'>
<re.Match object; span=(64, 65), match='-'>
<re.Match object; span=(68, 69), match='1'>
<re.Match object; span=(70, 71), match='1'>
<re.Match object; span=(78, 79), match='9'>
<re.Match object; span=(164, 165), match='1'>
<re.Match object; span=(165, 166), match='1'>
<re.Match object; span=(173, 174), match='1'>
<re.Match object; span=(174, 175), match='-'>
<re.Match object; span=(178, 179), match='-'>
<re.Match object; span=(182, 183), match='1'>
<re.Match object; span=(184, 185), match='1'>
<re.Match object; span=(192, 193), match='1'>
<re.Match object; span=(197, 198), match='1'>
<re.Match object; span=(205, 206), match='-'>
<re.Match object; span=(206, 207), match='1'>
<re.Match object; span=(211, 212), match='1'>
<re.Match object; span=(219, 220), match='1'>
<re.Match object; span=(227, 228), match='-'>
<re.Match object; span=(231, 232), match='-'>
<re.Match object; span=(232, 233), match='1'>


In [26]:
search = re.compile(r'\d\d\d\*-\d\d\d\d')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(201, 210), match='555*-1234'>


In [28]:
search = re.compile(r'\d{3}[*.-]\d{3}[*.-]\d{4}')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>
<re.Match object; span=(224, 236), match='800-555-1234'>
<re.Match object; span=(237, 249), match='900-555-1234'>


In [30]:
search = re.compile(r'[abc]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(2, 3), match='b'>
<re.Match object; span=(3, 4), match='c'>
<re.Match object; span=(82, 83), match='a'>
<re.Match object; span=(85, 86), match='a'>
<re.Match object; span=(87, 88), match='a'>
<re.Match object; span=(92, 93), match='a'>
<re.Match object; span=(95, 96), match='a'>
<re.Match object; span=(97, 98), match='a'>
<re.Match object; span=(98, 99), match='c'>
<re.Match object; span=(113, 114), match='b'>
<re.Match object; span=(118, 119), match='c'>
<re.Match object; span=(119, 120), match='a'>
<re.Match object; span=(156, 157), match='a'>
<re.Match object; span=(158, 159), match='a'>
<re.Match object; span=(161, 162), match='a'>
<re.Match object; span=(162, 163), match='a'>
<re.Match object; span=(167, 168), match='c'>
<re.Match object; span=(255, 256), match='c'>
<re.Match object; span=(257, 258), match='a'>
<re.Match object; span=(275, 276), match='a'>
<re.Match object; span=(287, 288), match='b'>


In [29]:
search = re.compile(r'[^abc]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.Match object; span=(24, 25), match='x'>
<re.Match object; span=(25, 26), match='y'>


In [15]:
data = 'cat mat bat sat dat lat can'
import re

In [16]:
search = re.compile(r'[^b]at')
data = search.finditer(data)

for match in data:
    print(match)

<re.Match object; span=(0, 3), match='cat'>
<re.Match object; span=(4, 7), match='mat'>
<re.Match object; span=(12, 15), match='sat'>
<re.Match object; span=(16, 19), match='dat'>
<re.Match object; span=(20, 23), match='lat'>


In [7]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.1234
123*555*-1234
123.555.1234
800-555-1234
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''


In [None]:
# Extract all the names

In [8]:
search = re.compile(r'M')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(40, 41), match='M'>
<re.Match object; span=(89, 90), match='M'>
<re.Match object; span=(250, 251), match='M'>
<re.Match object; span=(262, 263), match='M'>
<re.Match object; span=(271, 272), match='M'>
<re.Match object; span=(280, 281), match='M'>
<re.Match object; span=(294, 295), match='M'>
<re.Match object; span=(300, 301), match='M'>


In [9]:
search = re.compile(r'Mr')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 252), match='Mr'>
<re.Match object; span=(262, 264), match='Mr'>
<re.Match object; span=(280, 282), match='Mr'>
<re.Match object; span=(294, 296), match='Mr'>
<re.Match object; span=(300, 302), match='Mr'>


In [11]:
search = re.compile(r'Mr\.')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 253), match='Mr.'>
<re.Match object; span=(294, 297), match='Mr.'>


In [12]:
search = re.compile(r'Mr\. ')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 254), match='Mr. '>
<re.Match object; span=(294, 298), match='Mr. '>


In [14]:
search = re.compile(r'Mr\. [A-Z]')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 255), match='Mr. S'>
<re.Match object; span=(294, 299), match='Mr. T'>


In [19]:
# 1 and more --> +

search = re.compile(r'Mr\. [A-Z][a-z]+')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(251, 262), match='Mr. Schafer'>


In [18]:
# 0 and more --> *

search = re.compile(r'Mr\. [A-Z][a-z]*')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(251, 262), match='Mr. Schafer'>
<re.Match object; span=(295, 300), match='Mr. T'>


In [19]:
search = re.compile(r'Mr\.? [A-Z][a-z]*')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 261), match='Mr. Schafer'>
<re.Match object; span=(262, 270), match='Mr Smith'>
<re.Match object; span=(294, 299), match='Mr. T'>


In [20]:
search = re.compile(r'M[rs]\.? [A-Z][a-z]*')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 261), match='Mr. Schafer'>
<re.Match object; span=(262, 270), match='Mr Smith'>
<re.Match object; span=(271, 279), match='Ms Davis'>
<re.Match object; span=(294, 299), match='Mr. T'>


In [22]:
search = re.compile(r'M(r|rs|s)\.? [A-Z][a-z]*')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 261), match='Mr. Schafer'>
<re.Match object; span=(262, 270), match='Mr Smith'>
<re.Match object; span=(271, 279), match='Ms Davis'>
<re.Match object; span=(280, 293), match='Mrs. Robinson'>
<re.Match object; span=(294, 299), match='Mr. T'>


In [39]:
search = re.compile(r'M(r|rs|s)[\_|.]?[\s]?[A-z][a-z]*')
data = search.finditer(text_to_search)

for match in data:
    print(match)

<re.Match object; span=(250, 261), match='Mr. Schafer'>
<re.Match object; span=(262, 270), match='Mr Smith'>
<re.Match object; span=(271, 279), match='Ms Davis'>
<re.Match object; span=(280, 283), match='Mrs'>
<re.Match object; span=(294, 299), match='Mr. T'>
<re.Match object; span=(300, 308), match='Mr_hello'>


# Numpy

In [21]:
# Numpy

import numpy as np

In [43]:
#1. Creat an array by list/tuple

a = [1,2,3,4,5]
data = np.array(a)
print(data, type(data))

[1 2 3 4 5] <class 'numpy.ndarray'>


In [22]:
# Creat an array

a = 1,2,3,4,5
data = np.array(a)
print(data, type(data))

[1 2 3 4 5] <class 'numpy.ndarray'>


In [24]:
a = [[1,2],[3,4],[5,6]]
data = np.array(a)
print(data, type(data))

[[1 2]
 [3 4]
 [5 6]] <class 'numpy.ndarray'>


In [25]:
print(data.size) # counts the elements 
print(data.shape) # it tells the shape of an array
print(data.ndim) # dimension

6
(3, 2)
2


In [47]:
#2. arange function

np.arange(0,10,3)

array([0, 3, 6, 9])

In [29]:
a = list(range(0,10,3))
print(type(a))
print(a)

<class 'list'>
[0, 3, 6, 9]
