### 1. Numpy vs List

In [23]:
import numpy as np
import sys
import time

In [2]:
np.array([1,2,3])

array([1, 2, 3])

#### 3 main benefits of numpy arr over python list is less memory, fast and convenient
In numpy each element will occupy 4 bytes of memory, while in python as everything is object so a list will contain a list of pointers first and each pointer will point to list of objects. And each object will be of 14 bytes.
So its of importance is in case of huge no. of data

In [19]:
# python list
l = range(1000) # is a list with 1000 elements
print(sys.getsizeof(5)*len(l))

28000


In [20]:
# numpy array
array = np.arange(1000)
print(array.size*array.itemsize)
# size gives len of arr and itemsize gives size of each element

4000


From below e.g., We can see that numpy is taking comparatively very less time than python list

In [33]:
sizes= 100000

In [34]:
l1=range(sizes)
l2=range(sizes)
start = time.time()
ans = [(x+y) for x,y in zip(l1,l2)]
print("python list took:" ,(time.time()-start)*1000) #multiplied by 1000 for millisecond

python list took: 14.742374420166016


In [35]:
a1= np.arange(sizes)
a2 = np.arange(sizes)
start = time.time()
ans = a1+a2
print("numpy arr took:" ,(time.time()-start)*1000)

numpy arr took: 3.145933151245117


Also its convenient as we can directly perform any operation without applying list comprehension

In [36]:
a1 = np.array([1,2,3])
a2 = np.array([4,5,6])

In [40]:
print(a1+a2)
print(a2-a1)
print(a1*a2)

[5 7 9]
[3 3 3]
[ 4 10 18]


### 2. Numpy basic array operations

In [42]:
import numpy as np

In [48]:
#1d array
a = np.array([5,6,9])
a[0]

1

In [47]:
#2d array
a = np.array([[1,2],[3,4],[5,6]])
a.ndim   ##gives dimension of array

2

In [50]:
a.itemsize
##prints byte size of each of the element, as these are int, therefore, 4 bytes
a.dtype

dtype('int32')

In [154]:
a= np.array([[1,2],[3,4],[5,6]],dtype=np.float64)
print(a.itemsize)
print(a)

8
[[1. 2.]
 [3. 4.]
 [5. 6.]]


In [155]:
print(a.size) ## total no. of elements
print(a.shape) ## gives info on dimension, (sort of height & width, or rows & cols)

6
(3, 2)


In [60]:
# can also define dtype to be complex no.
a= np.array([[1,2],[3,4],[5,6]],dtype=complex)
a

array([[1.+0.j, 2.+0.j],
       [3.+0.j, 4.+0.j],
       [5.+0.j, 6.+0.j]])

In [62]:
# initializing arrays with some placeholder numbers
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [63]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [161]:
# range funcn in list vs arange in numpy
l= range(0,5) # 0 inclusive 5 not
l[0]
# numpy array works same
np.arange(1,5)
np.arange(1,7,2) #taking step of two in b/w

array([1, 3, 5])

In [75]:
#linspace func helps in generating numbers b/w given range which are linearly spaced
np.linspace(1,5,10)
np.linspace(1,5,5)
np.linspace(1,5,20)

array([1.        , 1.21052632, 1.42105263, 1.63157895, 1.84210526,
       2.05263158, 2.26315789, 2.47368421, 2.68421053, 2.89473684,
       3.10526316, 3.31578947, 3.52631579, 3.73684211, 3.94736842,
       4.15789474, 4.36842105, 4.57894737, 4.78947368, 5.        ])

In [80]:
#reshape funcn
a= np.array([[1,2],[3,4],[5,6]])
print(a)
print(a.shape)
a.reshape(2,3)
a.reshape(6,1) ### any compatible dimension will work

[[1 2]
 [3 4]
 [5 6]]
(3, 2)


array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

In [83]:
# to flatten array to 1D
print(a.ravel()) # & it'll not transform the original arr, instead will return a new one
print(a)

[1 2 3 4 5 6]
[[1 2]
 [3 4]
 [5 6]]


In [87]:
#Some mathematical operations
a.min()
a.max()
a.sum()
a.sum(axis=0) # sum of col's elements
a.sum(axis=1) # sum of row's elements

array([ 3,  7, 11])

In [91]:
#finding sqrt of arr elements
np.sqrt(a)
# finding std. deviation
np.std(a)

1.707825127659933

In [92]:
a= np.array([[1,2],[3,4]])
b= np.array([[5,6],[7,8]])

In [94]:
print(a+b)
print(a-b)
print(a*b)
print(a/b)

[[ 6  8]
 [10 12]]
[[-4 -4]
 [-4 -4]]
[[ 5 12]
 [21 32]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]


In [95]:
# matrix product
a.dot(b)

array([[19, 22],
       [43, 50]])

### 3. Numpy slicing indexing

In [101]:
#python lists slicing
l =[6,7,8]
print(l[0:2])
print(l[-1])

# Slicing in numpy array is similar to that of lists
a = np.array([6,7,8])
print(a[0:2])
print(a[-1])

[6, 7]
8
[6 7]
8


In [165]:
a = np.array([[6,7,8],[1,2,3],[9,4,5]])
a

array([[6, 7, 8],
       [1, 2, 3],
       [9, 4, 5]])

In [168]:
a[0,1] ## implies 1st row 2nd col, here also indexing is 0 based 

7

In [174]:
a[0:2] # prints 0th row and 1st row

array([[6, 7, 8],
       [1, 2, 3]])

In [179]:
a[0:2,2] # second element of 0th row and 1st row

array([8, 3])

In [186]:
a[-1]

array([9, 4, 5])

In [109]:
a[-1, 0:2] #last row 0th and 1st col

array([9, 4])

In [199]:
a[:,1:3] # all rows and 1st and 2nd col inclusive

array([[7, 8],
       [2, 3],
       [4, 5]])

In [112]:
# Iterating through array
a = np.array([[6,7,8],[1,2,3],[9,4,5]])
for row in a:
    print(row)

[6 7 8]
[1 2 3]
[9 4 5]


In [113]:
# flatten and print every cell using flat method
for cell in a.flat:
    print(cell)

6
7
8
1
2
3
9
4
5


In [200]:
#  stacking two arrays together
# initializing two 2d arr with no. from 0 to 5 and 6 to 11 & reshaping it in 3*2
a = np.arange(6).reshape(3,2)
b = np.arange(6,12).reshape(3,2)
a

array([[0, 1],
       [2, 3],
       [4, 5]])

In [202]:
print(np.vstack((a,b))) # vertical stacking
print(np.hstack((a,b))) # horizontal stacking

[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]]
[[ 0  1  6  7]
 [ 2  3  8  9]
 [ 4  5 10 11]]


In [205]:
a = np.arange(30).reshape(2,15)
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [219]:
# Splitting arrays
result = np.hsplit(a,3) # horizontally into 3 diff arrays
print(result)
print(result[2])

[array([[ 0,  1,  2,  3,  4],
       [15, 16, 17, 18, 19]]), array([[ 5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24]]), array([[10, 11, 12, 13, 14],
       [25, 26, 27, 28, 29]])]
[[10 11 12 13 14]
 [25 26 27 28 29]]


In [218]:
print(np.vsplit(a,2))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]]), array([[15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])]


In [136]:
# Indexing with boolean arrays
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [138]:
b=a>6
b # so with boolean condition we get a new array in bool form

array([[False, False, False, False],
       [False, False, False,  True],
       [ True,  True,  True,  True]])

In [147]:
# way of extracting all elements>6
a[b]

array([ 7,  8,  9, 10, 11])

In [149]:
# replacing elements>6 with -1
a[b] = -1
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6, -1],
       [-1, -1, -1, -1]])

### 4. Read Write Text files

Numpy provides funcn for reading and writing arrays to text files

In [150]:
# writing
x = np.array([[1,2,3],[4,5,6],[7,8,9]],np.int32)
np.savetxt("numpy_test.txt",x)

In [152]:
# reading
y = np.loadtxt("numpy_test.txt")
print(y)

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


### loadtxt and savetxt in numpy

#### 4.1 loadtxt

In [230]:
# help(np.loadtxt)

In [228]:
data = np.loadtxt('data_file_1.csv',skiprows=9,delimiter=',') # skipping alll rows which are not numeric....
# still considering values in whole row/line as one thing, i.e. a string therefore used delimiter(,) to get each numeric value seperately..
#if not use any of two it throws error msg 

In [232]:
# shows first 10 rows
data[0:10] 

array([[0.        , 0.        , 1.75      ],
       [0.01      , 0.06279052, 1.73817205],
       [0.02      , 0.12533323, 1.70287474],
       [0.03      , 0.18738131, 1.64466473],
       [0.04      , 0.24868989, 1.56446002],
       [0.05      , 0.30901699, 1.46352549],
       [0.06      , 0.36812455, 1.34345294],
       [0.07      , 0.42577929, 1.20613598],
       [0.08      , 0.48175367, 1.05374019],
       [0.09      , 0.53582679, 0.88866894]])

In [234]:
# but in case i'm writing the file, so instead I can in file itself put # before rows I want to skip..
# and seperate numeric values by tab/space rather than ','
# So lets use one such file..
data1b = np.loadtxt('data_file_1b.csv')
data1b[:10]

array([[0.        , 0.        , 1.75      ],
       [0.01      , 0.06279052, 1.73817205],
       [0.02      , 0.12533323, 1.70287474],
       [0.03      , 0.18738131, 1.64466473],
       [0.04      , 0.24868989, 1.56446002],
       [0.05      , 0.30901699, 1.46352549],
       [0.06      , 0.36812455, 1.34345294],
       [0.07      , 0.42577929, 1.20613598],
       [0.08      , 0.48175367, 1.05374019],
       [0.09      , 0.53582679, 0.88866894]])

In [235]:
(data-data1b)[:10]

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [236]:
data==data1b

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ T

#### 4.2 savetxt

In [239]:
# help(np.savetxt)
# here some of imp defaults are:- fmt ='%.18e', delimiter='', 
# newline='\n', header='',,,info on all can be found using help

In [None]:
from numpy import *
from matplotlib import *

In [246]:
t = np.arange(0,1,0.01)
y1= np.sin(2*np.pi*t)
y2 = np.cos(4*np.pi*t)

data = np.column_stack([t,y1,y2]) #column_stack func of numpy to make each of these a col
comments = ['line1','line2','line3']
labels = ['t','y1','y2']

label_row = ','.join(labels)
my_header = '\n'.join(comments)
my_header += '\n'
my_header += label_row

np.savetxt('savetxt_data_text.csv',data,delimiter=',',header=my_header)

In [247]:
data[:10] #here, data being same as defined 
# but when we do savetxt data is being stored by default in '%.18e' format

array([[0.        , 0.        , 1.        ],
       [0.01      , 0.06279052, 0.9921147 ],
       [0.02      , 0.12533323, 0.96858316],
       [0.03      , 0.18738131, 0.92977649],
       [0.04      , 0.24868989, 0.87630668],
       [0.05      , 0.30901699, 0.80901699],
       [0.06      , 0.36812455, 0.72896863],
       [0.07      , 0.42577929, 0.63742399],
       [0.08      , 0.48175367, 0.53582679],
       [0.09      , 0.53582679, 0.42577929]])

In [249]:
print(my_header)

line1
line2
line3
t,y1,y2


### 5. Iterating through numpy array

In [320]:
a= np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [251]:
# simple way iterating each row
for row in a:
    print(row)

[0 1 2 3]
[4 5 6 7]
[ 8  9 10 11]


In [256]:
# for iterating through each cell
for row in a:
    for cell in row:
        print(cell)

0
1
2
3
4
5
6
7
8
9
10
11


In [257]:
# As seen before, # this can be done without an extra for loop as well, 
# as here we're flattening the array
for cell in a.flatten():
    print(cell)

0
1
2
3
4
5
6
7
8
9
10
11


Numpy offers a function call nditer(), to do iteration in more sophisticated way

In [266]:
# going row by row & printing each col's element by element---> called C order,works same as flatten()
for x in np.nditer(a, order='c'):
    print(x)

0
1
2
3
4
5
6
7
8
9
10
11


In [268]:
#vice-versa, i.e here going col by col & printing row's element by element-->called fortran order
for x in np.nditer(a, order='f'):
    print(x)

0
4
8
1
5
9
2
6
10
3
7
11


In [281]:
# printing entire col on each iteration
for x in np.nditer(a,order='F',flags =['external_loop']):
    print(x)

[0 4 8]
[1 5 9]
[ 2  6 10]
[ 3  7 11]


In [323]:
## modifying elements while iterating through array 
for x in np.nditer(a, op_flags=['readwrite']):
    x[...]=x*x 

In [324]:
a ## prints square of orig. elements as original arr is being modified

array([[  0,   1,   4,   9],
       [ 16,  25,  36,  49],
       [ 64,  81, 100, 121]])

In [347]:
# iterating through two numpy arrays simultaneously
b = np.arange(3,15,4).reshape(3,1)
b

array([[ 3],
       [ 7],
       [11]])

In [348]:
## iterating through first row elements simultaneously
for x, y in np.nditer([a,b]):
    print(x,y)

0 3
1 3
4 3
9 3
16 7
25 7
36 7
49 7
64 11
81 11
100 11
121 11


General Broadcasting Rule comes here into play, & it says,

Two dimensions are compatible when:-
* They are equal, or
* One of them is 1

In [349]:
c = np.arange(3,19,5).reshape(4,1)
c

array([[ 3],
       [ 8],
       [13],
       [18]])

In [353]:
for x,y in np.nditer([b,c]):
    print(x,y)

ValueError: operands could not be broadcast together with shapes (3,1) (4,1) 

In [360]:
# instead making c to d:-...works both with a,d and b,d
d = np.arange(3,18,5).reshape(3,1)
d
for x,y in np.nditer([b,d]):
    print(x,y)

3 3
7 8
11 13


In [373]:
e = np.arange(6).reshape(3,2)
e
print(a)
print(e)
for x,y in np.nditer([a,e]):
    print(x,y)

[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]]
[[0 1]
 [2 3]
 [4 5]]


ValueError: operands could not be broadcast together with shapes (3,4) (3,2) 

a,b = 3*4 & 3*1 --works

a,c = 3*4 & 4*1 --not works

a,d = 3*4 & 3*1 --works

a,e = 3*4 & 3*2 --not works

b,c = 3*1 & 4*1 -- not works

b,d = 3*1 & 3*1 --works

b,e = 3*1 & 3*2 --works

c,d = 4*1 & 3*1 --not works

c,e = 4*1 & 3*2 --not works

d,e = 3*1 & 3*2 --works