In [None]:
'''
.       - Any Character Except New Line
\d      - Digit (0-9)
\D      - Not a Digit (0-9)
\w      - Word Character (a-z, A-Z, 0-9, _)
\W      - Not a Word Character
\s      - Whitespace (space, tab, newline)
\S      - Not Whitespace (space, tab, newline)

\b      - Word Boundary
\B      - Not a Word Boundary
^       - Beginning of a String
$       - End of a String

[]      - Matches Characters in brackets
[^ ]    - Matches Characters NOT in brackets
|       - Either Or
( )     - Group

Quantifiers:
*       - 0 or More
+       - 1 or More
?       - 0 or One
{3}     - Exact Number
{3,4}   - Range of Numbers (Minimum, Maximum)


#### Sample Regexs ####

[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+

'''

In [None]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.1234
123*555*-1234
123.555.1234
800-555-1234
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''

In [None]:
import re

In [None]:
# \B      - Not a Word Boundary

"""
\B - both the sides should be a word char
"""

pattern = re.compile('s\B')
matches = pattern.finditer('she sells sea shells on seashore')

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='s'>
<re.Match object; span=(4, 5), match='s'>
<re.Match object; span=(10, 11), match='s'>
<re.Match object; span=(14, 15), match='s'>
<re.Match object; span=(24, 25), match='s'>
<re.Match object; span=(27, 28), match='s'>


In [None]:
pattern = re.compile('\Bs\B')
matches = pattern.finditer('she sells sea shells on seashore')

for match in matches:
    print(match)

<re.Match object; span=(27, 28), match='s'>


In [None]:
# = ^ - beginning of the string

pattern = re.compile('^sea')
matches = pattern.finditer('she sells sea shells on seashore')

for match in matches:
    print(match)

In [None]:
# = $ - end of the string

pattern = re.compile('0re$')
matches = pattern.finditer('she sells sea shells on seashore')

for match in matches:
    print(match)

In [None]:
#- search all the 3 digit number

pattern = re.compile(r'\b\d\d\d\b')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(57, 60), match='321'>
<re.Match object; span=(61, 64), match='555'>
<re.Match object; span=(171, 174), match='321'>
<re.Match object; span=(175, 178), match='555'>
<re.Match object; span=(184, 187), match='123'>
<re.Match object; span=(188, 191), match='555'>
<re.Match object; span=(197, 200), match='123'>
<re.Match object; span=(201, 204), match='555'>
<re.Match object; span=(211, 214), match='123'>
<re.Match object; span=(215, 218), match='555'>
<re.Match object; span=(224, 227), match='800'>
<re.Match object; span=(228, 231), match='555'>
<re.Match object; span=(237, 240), match='900'>
<re.Match object; span=(241, 244), match='555'>


In [None]:
# extract a phone number - nnn.nnn.nnnn eh 123.567.1234

pattern = re.compile(r'\d\d\d\.\d\d\d\.\d\d\d\d')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>


In [None]:
print('afsan\\nkhan')

afsan\nkhan


In [None]:
# [] - matches the char in brackets

text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.1234
123*555*-1234
123.555.1234
800-555-1234
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''

pattern = re.compile(r'[sa]')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(1, 2), match='a'>
<re.Match object; span=(55, 56), match='s'>
<re.Match object; span=(82, 83), match='a'>
<re.Match object; span=(85, 86), match='a'>
<re.Match object; span=(87, 88), match='a'>
<re.Match object; span=(92, 93), match='a'>
<re.Match object; span=(95, 96), match='a'>
<re.Match object; span=(97, 98), match='a'>
<re.Match object; span=(102, 103), match='s'>
<re.Match object; span=(117, 118), match='s'>
<re.Match object; span=(119, 120), match='a'>
<re.Match object; span=(156, 157), match='a'>
<re.Match object; span=(158, 159), match='a'>
<re.Match object; span=(160, 161), match='s'>
<re.Match object; span=(161, 162), match='a'>
<re.Match object; span=(162, 163), match='a'>
<re.Match object; span=(257, 258), match='a'>
<re.Match object; span=(272, 273), match='s'>
<re.Match object; span=(275, 276), match='a'>
<re.Match object; span=(278, 279), match='s'>
<re.Match object; span=(282, 283), match='s'>
<re.Match object; span=(290, 291), match='s'>


In [None]:
pattern = re.compile(r'[12345]')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(57, 58), match='3'>
<re.Match object; span=(58, 59), match='2'>
<re.Match object; span=(59, 60), match='1'>
<re.Match object; span=(61, 62), match='5'>
<re.Match object; span=(62, 63), match='5'>
<re.Match object; span=(63, 64), match='5'>
<re.Match object; span=(65, 66), match='4'>
<re.Match object; span=(66, 67), match='3'>
<re.Match object; span=(67, 68), match='2'>
<re.Match object; span=(68, 69), match='1'>
<re.Match object; span=(70, 71), match='1'>
<re.Match object; span=(71, 72), match='2'>
<re.Match object; span=(72, 73), match='3'>
<re.Match object; span=(73, 74), match='4'>
<re.Match object; span=(74, 75), match='5'>
<re.Match object; span=(164, 165), match='1'>
<re.Match object; span=(165, 166), match='1'>
<re.Match object; span=(171, 172), match='3'>
<re.Match object; span=(172, 173), match='2'>
<re.Match object; span=(173, 174), match='1'>
<re.Match object; span=(175, 176), match='5'>
<re.Match object; span=(176, 177), match='5'>
<re.Match object; 

In [None]:
# extract a phone number - nnn.nnn.nnnn/nnn-nnn-nnnn/nnn*nnn*nnnn eh 123.567.1234

pattern = re.compile(r'\d\d\d[-*.]\d\d\d[-*.]\d\d\d\d')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>
<re.Match object; span=(224, 236), match='800-555-1234'>
<re.Match object; span=(237, 249), match='900-555-1234'>


In [None]:
# [^ ] - matches the char not in the brackets

In [None]:
pattern = re.compile(r'[^abc]')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(4, 5), match='d'>
<re.Match object; span=(5, 6), match='e'>
<re.Match object; span=(6, 7), match='f'>
<re.Match object; span=(7, 8), match='g'>
<re.Match object; span=(8, 9), match='h'>
<re.Match object; span=(9, 10), match='i'>
<re.Match object; span=(10, 11), match='j'>
<re.Match object; span=(11, 12), match='k'>
<re.Match object; span=(12, 13), match='l'>
<re.Match object; span=(13, 14), match='m'>
<re.Match object; span=(14, 15), match='n'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(16, 17), match='p'>
<re.Match object; span=(17, 18), match='q'>
<re.Match object; span=(18, 19), match='u'>
<re.Match object; span=(19, 20), match='r'>
<re.Match object; span=(20, 21), match='t'>
<re.Match object; span=(21, 22), match='u'>
<re.Match object; span=(22, 23), match='v'>
<re.Match object; span=(23, 24), match='w'>
<re.Match object; span=(24, 25), match='x'>
<re.Match object; span=(25, 26), match='y'>


In [None]:
data = 'cat fat mat pat bat'

# extract all the words except bat

pattern = re.compile(r'[cfmp]at')
matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(0, 3), match='cat'>
<re.Match object; span=(4, 7), match='fat'>
<re.Match object; span=(8, 11), match='mat'>
<re.Match object; span=(12, 15), match='pat'>


In [None]:
pattern = re.compile(r'[^b]at')
matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(0, 3), match='cat'>
<re.Match object; span=(4, 7), match='fat'>
<re.Match object; span=(8, 11), match='mat'>
<re.Match object; span=(12, 15), match='pat'>


In [None]:
# {} - exact numbers

pattern = re.compile(r'\d{3}[-*.]\d{3}[-*.]\d{4}')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(184, 196), match='123.555.1234'>
<re.Match object; span=(211, 223), match='123.555.1234'>
<re.Match object; span=(224, 236), match='800-555-1234'>
<re.Match object; span=(237, 249), match='900-555-1234'>


In [None]:
# {3,7} - exact numbers

text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.123
123*555*-1234
123.555.1234678
800-555-123445
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''

pattern = re.compile(r'\d{3}[-*.]\d{3}[-*.]\d{3,6}')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(57, 69), match='321-555-4321'>
<re.Match object; span=(171, 183), match='321-555-4321'>
<re.Match object; span=(184, 195), match='123.555.123'>
<re.Match object; span=(210, 224), match='123.555.123467'>
<re.Match object; span=(226, 240), match='800-555-123445'>
<re.Match object; span=(241, 253), match='900-555-1234'>


In [None]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.123
123*555*-1234
123.555.1234678
800-555-123445
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''

In [None]:
# we have to extract all the valid names

In [None]:
# step 1
pattern = re.compile(r'M')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(40, 41), match='M'>
<re.Match object; span=(89, 90), match='M'>
<re.Match object; span=(254, 255), match='M'>
<re.Match object; span=(266, 267), match='M'>
<re.Match object; span=(275, 276), match='M'>
<re.Match object; span=(284, 285), match='M'>
<re.Match object; span=(298, 299), match='M'>
<re.Match object; span=(304, 305), match='M'>


In [None]:
# step 2
pattern = re.compile(r'Mr')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 256), match='Mr'>
<re.Match object; span=(266, 268), match='Mr'>
<re.Match object; span=(284, 286), match='Mr'>
<re.Match object; span=(298, 300), match='Mr'>
<re.Match object; span=(304, 306), match='Mr'>


In [None]:
# step 3
pattern = re.compile(r'Mr.')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 257), match='Mr.'>
<re.Match object; span=(266, 269), match='Mr '>
<re.Match object; span=(284, 287), match='Mrs'>
<re.Match object; span=(298, 301), match='Mr.'>
<re.Match object; span=(304, 307), match='Mr_'>


In [None]:
# step 4
pattern = re.compile(r'Mr\.')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 257), match='Mr.'>
<re.Match object; span=(298, 301), match='Mr.'>


In [None]:
# step 5
pattern = re.compile(r'Mr\. ')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

In [None]:
# step 6
pattern = re.compile(r'Mr\. [A-Z]')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 259), match='Mr. S'>
<re.Match object; span=(298, 303), match='Mr. T'>


In [None]:
# step 7 - intro to *
pattern = re.compile(r'Mr\. [A-Z][a-z]*')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 265), match='Mr. Schafer'>
<re.Match object; span=(298, 303), match='Mr. T'>


In [None]:
# step 8
pattern = re.compile(r'Mr\.? [A-Z][a-z]*')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 265), match='Mr. Schafer'>
<re.Match object; span=(266, 274), match='Mr Smith'>
<re.Match object; span=(298, 303), match='Mr. T'>


In [None]:
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ\s
321-555-4321
1234567890
Ha HaHa
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
khanafsaan11.com
321-555-4321
123.555.123
123*555*-1234
123.555.1234678
800-555-123445
900-555-1234
Mr. Schafer
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
Mr_hello
'''

In [None]:
# step 9

pattern = re.compile(r'M[rs]\.? [A-Z][a-z]*')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 265), match='Mr. Schafer'>
<re.Match object; span=(266, 274), match='Mr Smith'>
<re.Match object; span=(275, 283), match='Ms Davis'>
<re.Match object; span=(298, 303), match='Mr. T'>


In [None]:
# step 10
pattern = re.compile(r'M(r|rs|s)\.? [A-Z][a-z]*')
matches = pattern.finditer(text_to_search)

for match in matches:
    print(match)

<re.Match object; span=(254, 265), match='Mr. Schafer'>
<re.Match object; span=(266, 274), match='Mr Smith'>
<re.Match object; span=(275, 283), match='Ms Davis'>
<re.Match object; span=(284, 297), match='Mrs. Robinson'>
<re.Match object; span=(298, 303), match='Mr. T'>


# NUMPY

In [None]:
# numpy - numeircal python

numpy is used for computation in preprcoessing if the array(1 or more dimesnion data)
numpy is apockaeg which provied convient and efficent way to handle large number of data
numpy is wriiten in C and extended in python

# why numpy

- numpy performs array-oreineted computation
- handle large number of data
- it has large set of methods to perfom mathical operation

In [None]:
import numpy as np

In [None]:
# Creation Numpy array

# 1. using a list / tuple

In [None]:
a = [1,2,3,4,5]
print(a, type(a))

array = np.array(a)
print(array, type(array))

[1, 2, 3, 4, 5] <class 'list'>
[1 2 3 4 5] <class 'numpy.ndarray'>


In [None]:
a = 1,2,3,4,5
print(a, type(a))

array = np.array(a)
print(array, type(array))

(1, 2, 3, 4, 5) <class 'tuple'>
[1 2 3 4 5] <class 'numpy.ndarray'>


In [None]:
a = [[1,2],[3,4],[5,7]]
print(a, type(a))
array = np.array(a)
print(array, type(array))

[[1, 2], [3, 4], [5, 7]] <class 'list'>
[[1 2]
 [3 4]
 [5 7]] <class 'numpy.ndarray'>


In [None]:
# Important attributes

In [None]:
# size of an array - no of elements present in array
array.size

6

In [None]:
# shape of an array 
array.shape 

(3, 2)

In [None]:
# dim of an array
array.ndim

2

In [None]:
scaler - 45 - 0 dim

vector - [11,10,9,8] - 1 dim


In [None]:
# 2. arange

np.arange(70,60,-2)

array([70, 68, 66, 64, 62])

In [None]:
# 3. linspace

# np.linspace(start,end,equal_division)
np.linspace(0,101,10)

array([  0.        ,  11.22222222,  22.44444444,  33.66666667,
        44.88888889,  56.11111111,  67.33333333,  78.55555556,
        89.77777778, 101.        ])

In [None]:
1.47368421 - 1

0.47368421000000005

In [None]:
9.52631579- 9.05263158

0.47368421000000005

In [None]:
#4. zeros

np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [None]:
#5 zero_like

data = np.linspace(0,101,10)
np.zeros_like(data)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
a = [[1.0,2],[3,4],[5,7]]
print(a, type(a))

array = np.array(a)
print(array, type(array))

[[1.0, 2], [3, 4], [5, 7]] <class 'list'>
[[1. 2.]
 [3. 4.]
 [5. 7.]] <class 'numpy.ndarray'>


In [None]:
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [None]:
np.zeros_like(array)

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [None]:
array

array([[1., 2.],
       [3., 4.],
       [5., 7.]])

In [None]:
#6. ones

np.ones((3,2))

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [None]:
#7. ones_like

np.ones_like(array)

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [None]:
# 8. eye - idnentiy matrix

dig is 1
- row = columns ( sqaure matrix)

In [None]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [None]:
# 9. random matrix

np.random.random_integers(2,4)

  np.random.random_integers(2,4)


2

In [None]:
# reshape - changing the shape of an array

In [None]:
data = np.arange(0,10)

In [None]:
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
1d ----> 2d

rules - 
number of elements in 1 should be present 2

In [None]:
data.reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [None]:
data.reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [None]:
names_data = np.array(['Vinay','Dovvari','keerthana','hmridha','Afsan','raja','shailja', 'Biven'])

In [None]:
names_data

array(['Vinay', 'Dovvari', 'keerthana', 'hmridha', 'Afsan', 'raja',
       'shailja', 'Biven'], dtype='<U9')

In [None]:
names_data.reshape(1,1,8)

array([[['Vinay', 'Dovvari', 'keerthana', 'hmridha', 'Afsan', 'raja',
         'shailja', 'Biven']]], dtype='<U9')

In [None]:
infer
flatten
ravel
Basic operation on array
Indexing and slicing on array
iterate
splitting of array