<a href="https://colab.research.google.com/github/SauravDakre/MachineLearning/blob/master/basics/numpy/numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

# NumPy Arrays


`np.array` takes python list and dtype (numpy type) as input.  
refer ex of 1-D matrix



In [2]:
arr = np.array([0,1,2], dtype=np.float32)
arr

array([0., 1., 2.], dtype=float32)

repr() function returns a printable representation string of given object

In [3]:
repr(arr)

'array([0., 1., 2.], dtype=float32)'

In [4]:
print(repr(arr))

array([0., 1., 2.], dtype=float32)


example to create 2D matrix

In [5]:
arr = np.array([[0,1,2], [3,4,5]], dtype=np.float32)
arr

array([[0., 1., 2.],
       [3., 4., 5.]], dtype=float32)

## Copying

In [6]:
a = np.array([0,2])
b = np.array([1,5])
print('a: {}'.format(repr(a)))
print('b: {}'.format(repr(b)))
c=a
d = b.copy()
c[0]=5
d[0]=6
print('a: {}'.format(repr(a)))
print('b: {}'.format(repr(b)))

a: array([0, 2])
b: array([1, 5])
a: array([5, 2])
b: array([1, 5])


## Casting

In [7]:
a = np.array([0,2])
print(a.dtype , a)
a = a.astype(np.float32)
print(a.dtype , a)

int64 [0 2]
float32 [0. 2.]


## NaN

In [8]:
a = np.array([np.nan, 1,2])
b = np.array([np.nan, 'a'])

# c = np.array([np.nan, 1], dtype=np.int32) # ValueError: cannot convert float NaN to integer
print('a', a.dtype, a)
print('b', b.dtype, b)

a float64 [nan  1.  2.]
b <U32 ['nan' 'a']


## infinity

In [9]:
print(np.inf > 100000000000)
a = np.array([np.inf, 5, -np.inf])
print(repr(a))
# np.array([np.inf, 5, -np.inf], dtype=np.int32) # OverflowError: cannot convert float infinity to integer

True
array([ inf,   5., -inf])


In [10]:
float_arr = np.array([1,5.4, 3])
print(float_arr.dtype, repr(float_arr))

float64 array([1. , 5.4, 3. ])


# Basics

## ranged data

In [11]:
a = np.arange(2)
b = np.arange(-1,3)
c = np.arange(2,15,4)
print(a)
print(b)
print(c)

[0 1]
[-1  0  1  2]
[ 2  6 10 14]


In [12]:
a = np.linspace(2,15,num=4)
b = np.linspace(2,15,num=4,endpoint=False)
c = np.linspace(2,15,num=4,endpoint=False, dtype=np.int32)
print(a)
print(b)
print(c)

[ 2.          6.33333333 10.66666667 15.        ]
[ 2.    5.25  8.5  11.75]
[ 2  5  8 11]


## reshape

In [13]:
ar = np.arange(10)
print(ar.dtype, ar)

int64 [0 1 2 3 4 5 6 7 8 9]


In [14]:
# r1 = np.reshape(ar, (2,4)) # ValueError: cannot reshape array of size 10 into shape (2,4)
r2 = np.reshape(ar, (2,5))
print( r2)

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [15]:
ar = np.arange(12)
r2 = np.reshape(ar, (2,-1,2)) # the special value of -1 can be used in at most one dimension of the new shape. The dimension with -1 will take on the value necessary to allow the new shape to contain all the elements of the array.
print(r2.shape, r2)

(2, 3, 2) [[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]]


In [16]:
f1 = r2.flatten()
print(f1.shape, f1)

(12,) [ 0  1  2  3  4  5  6  7  8  9 10 11]


## transpose

In [17]:
arr = np.arange(8)
arr = np.reshape(arr, (4, 2))
transposed = np.transpose(arr)
print(arr.shape, arr)
print(transposed.shape, transposed)

(4, 2) [[0 1]
 [2 3]
 [4 5]
 [6 7]]
(2, 4) [[0 2 4 6]
 [1 3 5 7]]


In [18]:
arr = np.arange(24)
arr = np.reshape(arr, (3, 4, 2))
print(arr)
transposed = np.transpose(arr, axes=(1, 2, 0))
print(transposed)
print('arr shape: {}'.format(arr.shape))
print('transposed shape: {}'.format(transposed.shape))

[[[ 0  1]
  [ 2  3]
  [ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]
  [12 13]
  [14 15]]

 [[16 17]
  [18 19]
  [20 21]
  [22 23]]]
[[[ 0  8 16]
  [ 1  9 17]]

 [[ 2 10 18]
  [ 3 11 19]]

 [[ 4 12 20]
  [ 5 13 21]]

 [[ 6 14 22]
  [ 7 15 23]]]
arr shape: (3, 4, 2)
transposed shape: (4, 2, 3)


In [19]:
a = np.zeros(4)
print(a)
b = np.ones((2,4), dtype=np.int32)
print(b)
c=np.zeros_like(b)
print(c)
a = np.array([[1,2], [3,4]])
print(a)
b = np.ones_like(a)
print(b)


[0. 0. 0. 0.]
[[1 1 1 1]
 [1 1 1 1]]
[[0 0 0 0]
 [0 0 0 0]]
[[1 2]
 [3 4]]
[[1 1]
 [1 1]]


# math

## arithematic

we can perform arithematic operation like add, subtract, multiply, divide, power which will be performed on all elements of matrix

In [20]:
ar = np.array([[1,2],[3,4]])
print(ar)
print(ar/2)
print(ar//2)
print(ar*2)
print(ar**0.5)

[[1 2]
 [3 4]]
[[0.5 1. ]
 [1.5 2. ]]
[[0 1]
 [1 2]]
[[2 4]
 [6 8]]
[[1.         1.41421356]
 [1.73205081 2.        ]]


In [21]:
def perform_some_operation(t):
  return t*2 + 1
  

In [22]:
modified = perform_some_operation(ar)
print(modified)
print(ar)
      

[[3 5]
 [7 9]]
[[1 2]
 [3 4]]


## non linear function

In [23]:
a = np.array([[1,2],[3,4]])
pow = np.array([[3,3],[2,2]])
print(a)
print(np.exp(a)) # power of e
print(np.exp2(a)) # power of 2
print(np.log(a)) # natural log
print(np.log10(a))
print(np.power(5,a)) # raise 5 to power of a
print(np.power(a,5)) # raise a to power of 5
print(np.power(a,pow)) # raise element of a to element of pow

[[1 2]
 [3 4]]
[[ 2.71828183  7.3890561 ]
 [20.08553692 54.59815003]]
[[ 2.  4.]
 [ 8. 16.]]
[[0.         0.69314718]
 [1.09861229 1.38629436]]
[[0.         0.30103   ]
 [0.47712125 0.60205999]]
[[  5  25]
 [125 625]]
[[   1   32]
 [ 243 1024]]
[[ 1  8]
 [ 9 16]]


## matrix multiplication

In [24]:
print(np.matmul(np.array([1,2]), np.array([3,4])))

11


In [25]:
a = np.array([[1,2],[3,4]])
b = np.array([[4,2],[2,4]])
print(np.matmul(a,b))

[[ 8 10]
 [20 22]]


## Random

In [29]:
print(np.random.randint(5, high=6))

5


In [27]:
random_arr = np.random.randint(-3, high=14, size=(2, 2))
random_arr

array([[13,  0],
       [-3,  7]])

In [30]:
# np.random.seed() - sets the random seed, which allow us to control result of random function
np.random.seed(1)
print(np.random.randint(15))
random_arr = np.random.randint(-3, high=14, size=(2, 2))
print(random_arr)

5
[[8 9]
 [5 6]]


In [31]:
np.random.seed(1)
print(np.random.randint(15))
random_arr = np.random.randint(-3, high=14, size=(2, 2))
print(random_arr)

5
[[8 9]
 [5 6]]


In [32]:
np.random.seed(2)
print(np.random.randint(15))
random_arr = np.random.randint(-3, high=14, size=(2, 2))
print(random_arr)

8
[[12 10]
 [ 5  8]]


In [34]:
v = np.array([1,2,3,4])
np.random.shuffle(v)
print(v)
np.random.shuffle(random_arr)
print(random_arr)

[1 3 4 2]
[[ 5  8]
 [12 10]]


In [35]:
print(np.random.uniform(low=-1.5, high=2.2))
print(np.random.normal(loc=1.5, scale=3.5))

0.29515086551950054
5.438479075011392


In [38]:
day = ['sunday', 'monday', 'tuesday']
print(np.random.choice(day))
print(np.random.choice(day, size=(2,3), p=[0.4,0.4,0.2]))

monday
[['tuesday' 'sunday' 'sunday']
 ['sunday' 'sunday' 'monday']]


## indexing

In [39]:
ar = np.array([1,2,3,4,5])
print(ar[2])

3


In [40]:
# slicing
print(repr(ar[:]))
print(repr(ar[1:]))
print(repr(ar[2:4]))
print(repr(ar[:-1]))
print(repr(ar[-2:]))

array([1, 2, 3, 4, 5])
array([2, 3, 4, 5])
array([3, 4])
array([1, 2, 3, 4])
array([4, 5])


In [41]:
m = np.reshape(np.arange(9), (3,3))
m

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [45]:
print(repr(m[:]))
print(repr(m[1:]))
print("m:", m)
print("m[:, -1]", repr(m[:, -1]))
print("m[:, 1:]",repr(m[:, 1:]))
print("m[0:1, 1:",repr(m[0:1, 1:]))
print("m[0:2, 1:",repr(m[0:2, 1:]))
print("m[0, 1:]",repr(m[0, 1:]))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
array([[3, 4, 5],
       [6, 7, 8]])
m: [[0 1 2]
 [3 4 5]
 [6 7 8]]
m[:, -1] array([2, 5, 8])
m[:, 1:] array([[1, 2],
       [4, 5],
       [7, 8]])
m[0:1, 1: array([[1, 2]])
m[0:2, 1: array([[1, 2],
       [4, 5]])
m[0, 1:] array([1, 2])


In [46]:
# argmin - gives the index of min element
# argmax - gives the index of max element
arr = np.array([[-2, -1, -3],
                [4, 5, -6],
                [-3, 9, 1]])
print(np.argmin(arr[0]))
print(np.argmax(arr[2]))
print(np.argmin(arr))

2
1
5


In [48]:
print(arr)
# using axis=0 meant the function found the index of the minimum row element for each column. 
# When we used axis=1, the function found the index of the minimum column element for each row.
# Setting axis to -1 just means we apply the function across the last dimension. In this case, axis=-1 is equivalent to axis=1.
print(repr(np.argmin(arr, axis=0)))
print(repr(np.argmin(arr, axis=1)))
print(repr(np.argmax(arr, axis=-1)))

[[-2 -1 -3]
 [ 4  5 -6]
 [-3  9  1]]
array([2, 0, 1])
array([2, 2, 0])
array([1, 1, 1])


In [49]:
print(arr)
print(repr(arr == 3))
print(repr(arr > 0))
print(repr(arr != 1))
# Negated from the previous step
print(repr(~(arr != 1)))

[[-2 -1 -3]
 [ 4  5 -6]
 [-3  9  1]]
array([[False, False, False],
       [False, False, False],
       [False, False, False]])
array([[False, False, False],
       [ True,  True, False],
       [False,  True,  True]])
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True, False]])
array([[False, False, False],
       [False, False, False],
       [False, False,  True]])


In [50]:
arr = np.array([[0, 2, np.nan],
                [1, np.nan, -6],
                [np.nan, -2, 1]])
print(repr(np.isnan(arr)))

array([[False, False,  True],
       [False,  True, False],
       [ True, False, False]])


In [51]:
# filtering
# where function prints index
print("repr(np.where([True, False, True]))",repr(np.where([True, False, True])))

arr = np.array([0, 3, 5, 3, 1])
print("repr(np.where(arr == 3))",repr(np.where(arr == 3)))

arr = np.array([[0, 2, 3],
                [1, 0, 0],
                [-3, 0, 0]])
x_ind, y_ind = np.where(arr != 0)
print("repr(x_ind)",repr(x_ind)) # x indices of non-zero elements
print("repr(y_ind)", repr(y_ind)) # y indices of non-zero elements
print("repr(arr[x_ind, y_ind]",repr(arr[x_ind, y_ind]))

repr(np.where([True, False, True])) (array([0, 2]),)
repr(np.where(arr == 3)) (array([1, 3]),)
repr(x_ind) array([0, 0, 1, 2])
repr(y_ind) array([1, 2, 0, 0])
repr(arr[x_ind, y_ind] array([ 2,  3,  1, -3])


In [52]:
arr = np.array([[-2, -1, -3],
                [4, 5, -6],
                [3, 9, 1]])
print(repr(arr > 0))
print(np.any(arr > 0)) # or
print(np.all(arr > 0)) # and

array([[False, False, False],
       [ True,  True, False],
       [ True,  True,  True]])
True
False


## statistics

In [62]:
arr = np.array([[0, 2, 5],
                [1, 8, -6],
                [-9, -2, 5]])
print(arr.min())
print(arr.max())

print(repr(arr.min(axis=0))) # column wise
print(repr(arr.max(axis=1))) # row wise

-9
8
array([-9, -2, -6])
array([5, 8, 5])


In [63]:
print(arr)
print(np.mean(arr))
print(np.var(arr))
print(np.median(arr))
print(repr(np.median(arr, axis=-1)))

[[ 0  2  5]
 [ 1  8 -6]
 [-9 -2  5]]
0.4444444444444444
26.469135802469136
1.0
array([ 2.,  1., -2.])


## aggregation

In [64]:
print(arr)
print(np.sum(arr))
print(repr(np.sum(arr, axis=0)))
print(repr(np.sum(arr, axis=1)))

[[ 0  2  5]
 [ 1  8 -6]
 [-9 -2  5]]
4
array([-8,  8,  4])
array([ 7,  3, -6])


In [65]:
print(arr)
# cumulative sum
print(repr(np.cumsum(arr)))
print(repr(np.cumsum(arr, axis=0)))
print(repr(np.cumsum(arr, axis=1)))

[[ 0  2  5]
 [ 1  8 -6]
 [-9 -2  5]]
array([ 0,  2,  7,  8, 16, 10,  1, -1,  4])
array([[ 0,  2,  5],
       [ 1, 10, -1],
       [-8,  8,  4]])
array([[  0,   2,   7],
       [  1,   9,   3],
       [ -9, -11,  -6]])


In [69]:
ar2 = np.reshape(np.arange(9), (3,3))
print(ar2)
print(arr)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 0  2  5]
 [ 1  8 -6]
 [-9 -2  5]]


In [72]:
print(repr(np.concatenate([arr, ar2])))
print(repr(np.concatenate([arr, ar2], axis=1)))
print(repr(np.concatenate([ar2, arr], axis=1)))
print(repr(np.concatenate([ar2, arr], axis=0)))

array([[ 0,  2,  5],
       [ 1,  8, -6],
       [-9, -2,  5],
       [ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8]])
array([[ 0,  2,  5,  0,  1,  2],
       [ 1,  8, -6,  3,  4,  5],
       [-9, -2,  5,  6,  7,  8]])
array([[ 0,  1,  2,  0,  2,  5],
       [ 3,  4,  5,  1,  8, -6],
       [ 6,  7,  8, -9, -2,  5]])
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 0,  2,  5],
       [ 1,  8, -6],
       [-9, -2,  5]])


In [73]:
# save data
arr = np.array([1, 2, 3])
np.save('arr.npy', arr)
load_arr = np.load('arr.npy')
print(repr(load_arr))

array([1, 2, 3])
