### What is numpy?

NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.


At the core of the NumPy package, is the ndarray object. This encapsulates n-dimensional arrays of homogeneous data types

### Numpy Arrays Vs Python Sequences

- NumPy arrays have a fixed size at creation, unlike Python lists (which can grow dynamically). Changing the size of an ndarray will create a new array and delete the original.

- The elements in a NumPy array are all required to be of the same data type, and thus will be the same size in memory.

- NumPy arrays facilitate advanced mathematical and other types of operations on large numbers of data. Typically, such operations are executed more efficiently and with less code than is possible using Python’s built-in sequences.

- A growing plethora of scientific and mathematical Python-based packages are using NumPy arrays; though these typically support Python-sequence input, they convert such input to NumPy arrays prior to processing, and they often output NumPy arrays.

### Creating Numpy Arrays

In [5]:
# np.array
import numpy as np

#1D - Vector 
a = np.array([1, 2, 3])
print(a)
print(type(a))

# 2D and 3D - Matrix
b = np.array([[1, 2, 3], [5, 6, 7]])
print(b)
print(type(b))

[1 2 3]
<class 'numpy.ndarray'>
[[1 2 3]
 [5 6 7]]
<class 'numpy.ndarray'>


In [6]:
# 3D - Tensor

c = np.array([[[1, 2], [3, 4], [7, 8]]])
print(c)
print(type(c))

[[[1 2]
  [3 4]
  [7 8]]]
<class 'numpy.ndarray'>


In [7]:
# dtype
np.array([1, 2, 3], dtype=float)

array([1., 2., 3.])

In [9]:
# np.arange
np.arange(1, 11)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [11]:
# with reshape
np.arange(1, 11).reshape(5, 2)

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [12]:
np.arange(1, 13).reshape(4, 3)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [13]:
# np.ones and np.zeros
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [14]:
np.zeros((4, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [20]:
# np.random
# between 0 and 1
np.random.random((3, 4))

array([[0.79645294, 0.35923936, 0.43381542, 0.38452243],
       [0.65186258, 0.63751985, 0.41545416, 0.19767279],
       [0.96989199, 0.78467615, 0.11213052, 0.29243265]])

In [23]:
# np.linspace
np.linspace(-10, 10, 10)

array([-10.        ,  -7.77777778,  -5.55555556,  -3.33333333,
        -1.11111111,   1.11111111,   3.33333333,   5.55555556,
         7.77777778,  10.        ])

In [24]:
np.linspace(-10, 10, 10, dtype=int)

array([-10,  -8,  -6,  -4,  -2,   1,   3,   5,   7,  10])

In [26]:
# np.identity
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Array Attributes

In [28]:
a1 = np.arange(10)
a2 = np.arange(12, dtype=float).reshape(3, 4)
a3 = np.arange(8).reshape(2, 2, 2)
print(a1)
print()
print(a2)
print()
print(a3)

[0 1 2 3 4 5 6 7 8 9]

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [30]:
# ndim
print(a1.ndim)
print(a2.ndim)
print(a3.ndim)

1
2
3


In [31]:
# shape
print(a1.shape)
print(a2.shape)
print(a3.shape)

(10,)
(3, 4)
(2, 2, 2)


In [32]:
# size   - total no. of items
print(a1.size)
print(a2.size)
print(a3.size)

10
12
8


In [33]:
# itemsize  - how much storage is each byte taking
print(a1.itemsize)
print(a2.itemsize)
print(a3.itemsize)

4
8
4


In [None]:
# in above example we got different because the a1 and a3 have int32 it takes 4 bytes and float uses 8 bytes
# See below code

In [34]:
# dtype
print(a1.dtype)
print(a2.dtype)
print(a3.dtype)

int32
float64
int32


### Changing Datatype

In [39]:
# astype
a3 = np.arange(8, dtype=np.int64).reshape(2, 2, 2)
print(a3)
print(a3.dtype)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]
int64


In [40]:
a3.astype(np.int32)

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

### Array Operations

In [42]:
a1 = np.arange(12).reshape(3, 4)
a2 = np.arange(12, 24).reshape(3, 4)
print(a1)
print()
print(a2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [44]:
# scalar operations

# arithmetic
a1 * 2

array([[ 0,  2,  4,  6],
       [ 8, 10, 12, 14],
       [16, 18, 20, 22]])

In [47]:
# relational 
# -> return True or False 
print(a1)
print()
print(a1 > 5)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[False False False False]
 [False False  True  True]
 [ True  True  True  True]]


In [51]:
# vector operations
# arithmetic
# since they both have same shape, it will get added
print(a1)
print(a2)
print()

print(a1 + a2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]

[[12 14 16 18]
 [20 22 24 26]
 [28 30 32 34]]


### Array Functions

In [58]:
# numbers between 1 to 100

a1 = np.random.random((3, 3))
a1 = np.round(a1 * 100)
print(a1)

[[43.  7. 55.]
 [ 0. 20. 22.]
 [11.  8. 88.]]


In [64]:
# min/max/sum/prod
print(np.max(a1))
print(np.min(a1))
print(np.sum(a1))
print(np.prod(a1))

88.0
0.0
254.0
0.0


In [66]:
print(np.max(a1, axis=1))
print(np.min(a1, axis=1))
print(np.sum(a1, axis=1))
print(np.prod(a1, axis=1))

[55. 22. 88.]
[7. 0. 8.]
[105.  42. 107.]
[16555.     0.  7744.]


In [68]:
# mean/median/std/var
print(np.mean(a1))
print(np.mean(a1, axis=1))
print(np.median(a1))
print(np.std(a1))
print(np.var(a1))

28.22222222222222
[35.         14.         35.66666667]
20.0
27.013485429720625
729.7283950617284


In [69]:
# trignometry   - sin/cos/tan
np.sin(a1)

array([[-0.83177474,  0.6569866 , -0.99975517],
       [ 0.        ,  0.91294525, -0.00885131],
       [-0.99999021,  0.98935825,  0.0353983 ]])

In [80]:
# dot product
a2 = np.ones(4).reshape(2, 2)
a3 = np.arange(4).reshape(2, 2)
print(a2)
print(a3)
print()
print(np.dot(a2, a3))

[[1. 1.]
 [1. 1.]]
[[0 1]
 [2 3]]

[[2. 4.]
 [2. 4.]]


In [92]:
np.multiply(a2, a3)

array([[0., 1.],
       [2., 3.]])

In [85]:
# log and exponents
a1 = np.arange(1, 10).reshape(3, 3)
print(a1)
print()
np.log(a1)

[[1 2 3]
 [4 5 6]
 [7 8 9]]



array([[0.        , 0.69314718, 1.09861229],
       [1.38629436, 1.60943791, 1.79175947],
       [1.94591015, 2.07944154, 2.19722458]])

In [84]:
np.exp(a1)

array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01],
       [5.45981500e+01, 1.48413159e+02, 4.03428793e+02],
       [1.09663316e+03, 2.98095799e+03, 8.10308393e+03]])

In [90]:
# round/floor/ceil
a_random = np.random.random((2, 3))
print(a_random)
print()

print(np.round(a_random * 100))
print()
print(np.floor(a_random * 100))
print()
print(np.ceil(a_random * 100))

[[0.92630802 0.98087173 0.94882222]
 [0.24390337 0.20826878 0.06515192]]

[[93. 98. 95.]
 [24. 21.  7.]]

[[92. 98. 94.]
 [24. 20.  6.]]

[[93. 99. 95.]
 [25. 21.  7.]]


### Indexing and Slicing

In [94]:
a1 = np.arange(10)
a2 = np.arange(12).reshape(3, 4)
a3 = np.arange(8).reshape((2, 2, 2))

print(a1)
print()
print(a2)
print()
print(a3)

[0 1 2 3 4 5 6 7 8 9]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [95]:
a1[-1]

9

In [96]:
a1[0]

0

In [101]:
print(a2)
a2[1, 2]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


6

In [102]:
a2[1, 0]

4

In [103]:
a3

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [112]:
print(a3[1])

[[4 5]
 [6 7]]


In [113]:
a3[1, 0]

array([4, 5])

In [115]:
a3[1, 1]

array([6, 7])

In [104]:
a3[1, 0, 1]

5

In [106]:
a3[0, 1, 0]

2

In [107]:
a3[0, 0, 0]

0

In [108]:
a3[1, 1, 0]

6

In [109]:
a1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [110]:
a1[2:5]

array([2, 3, 4])

In [118]:
a1[-1:-5:-1]

array([9, 8, 7, 6])

In [121]:
print(a2)
a2[0]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


array([0, 1, 2, 3])

In [122]:
a2[0, 2]

2

In [123]:
a2[:, 2]

array([ 2,  6, 10])

In [125]:
print(a2[1:3, 1:3])

[[ 5  6]
 [ 9 10]]


In [127]:
print(a2[::2, ::3])

[[ 0  3]
 [ 8 11]]


In [130]:
print(a2[::2, 1::2])

[[ 1  3]
 [ 9 11]]


In [131]:
a2[1, 0::3]

array([4, 7])

In [133]:
print(a2[0:2, 1:])

[[1 2 3]
 [5 6 7]]


In [136]:
a3 = np.arange(27).reshape(3, 3, 3)
print(a3)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [138]:
print(a3[1])

[[ 9 10 11]
 [12 13 14]
 [15 16 17]]


In [139]:
print(a3[::2])

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [140]:
print(a3[0,1])

[3 4 5]


In [141]:
print(a3[1,:,1])

[10 13 16]


In [142]:
print(a3[2,1:,1:])

[[22 23]
 [25 26]]


In [143]:
print(a3[::2,0,::2])

[[ 0  2]
 [18 20]]


### Iterating

In [147]:
print(a1)

for i in a1:
    print(i)

[0 1 2 3 4 5 6 7 8 9]
0
1
2
3
4
5
6
7
8
9


In [150]:
print(a2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [149]:
for i in a2:
    print(i)

[0 1 2 3]
[4 5 6 7]
[ 8  9 10 11]


In [151]:
print(a3)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [152]:
for i in a3:
    print(i)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 9 10 11]
 [12 13 14]
 [15 16 17]]
[[18 19 20]
 [21 22 23]
 [24 25 26]]


In [153]:
# what if we want to loop over individual items
for i in np.nditer(a3):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


### Reshaping

In [None]:
# reshape

In [155]:
# transpose
print(a2)
print()

print(a2.T)
# or
# np.transpose(a3)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [157]:
# ravel - converts ny dimensional data to 1D array
print(a3)
print()
a3.ravel()

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]



array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26])

 ### Stacking
 `It is used when we get the data in splits`


In [161]:
# horizontal stacking
a4 = np.arange(12).reshape(3, 4)
a5 = np.arange(12, 24).reshape(3, 4)
print(a4)
print()
print(a5)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [163]:
print(np.hstack((a4, a5)))

[[ 0  1  2  3 12 13 14 15]
 [ 4  5  6  7 16 17 18 19]
 [ 8  9 10 11 20 21 22 23]]


In [164]:
print(np.hstack((a4, a4 ,a4, a5)))

[[ 0  1  2  3  0  1  2  3  0  1  2  3 12 13 14 15]
 [ 4  5  6  7  4  5  6  7  4  5  6  7 16 17 18 19]
 [ 8  9 10 11  8  9 10 11  8  9 10 11 20 21 22 23]]


In [166]:
# vertical stacking
print(np.vstack((a4, a5)))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [169]:
print(np.vstack((a4, a4, a5)))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


### Splitting
Opposite of stacking

In [172]:
a6 = np.hstack((a4, a5))
print(a6)
print()

a7 = np.hsplit(a6, 2)
print(a7)
print(type(a7))

[[ 0  1  2  3 12 13 14 15]
 [ 4  5  6  7 16 17 18 19]
 [ 8  9 10 11 20 21 22 23]]

[array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]]), array([[12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])]
<class 'list'>


In [173]:
a8 = np.hsplit(a6, 4)
print(a8)

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]]), array([[12, 13],
       [16, 17],
       [20, 21]]), array([[14, 15],
       [18, 19],
       [22, 23]])]


In [174]:
np.hsplit(a6, 3)

ValueError: array split does not result in an equal division

In [176]:
print(a5)

[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [177]:
# We cannot divide this in 2 splits. It has to be 3
np.vsplit(a5, 3)

[array([[12, 13, 14, 15]]),
 array([[16, 17, 18, 19]]),
 array([[20, 21, 22, 23]])]

In [179]:
np.vsplit(a6, 3)

[array([[ 0,  1,  2,  3, 12, 13, 14, 15]]),
 array([[ 4,  5,  6,  7, 16, 17, 18, 19]]),
 array([[ 8,  9, 10, 11, 20, 21, 22, 23]])]