# Numpy

Numpy is a Linear algebra for python. Other data science libraries rely on numpy. Numpy arrays can be divided into vectors and matrices. Numpy has the ability to quickly broadcast functions. It has built in linear algebra, statistical distribution, trigonometric and random number capabilities.

In [1]:
import numpy as np

## Creating numpy array

In [2]:
my_list = [0, 1, 2, 3]  #list
ar = np.array(my_list)
print(ar)

[0 1 2 3]


In [4]:
ar.dtype  #data type of array

dtype('int32')

In [6]:
ar.shape  #shape/dimension of array. returns a tuple

(4,)

In [7]:
# matrix
my_list = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]
ar = np.array(my_list)
print(ar)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [8]:
ar.shape

(3, 3)

## arange()

In [9]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [10]:
np.arange(5, 15)

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [11]:
np.arange(5, 20, 2)

array([ 5,  7,  9, 11, 13, 15, 17, 19])

## zeros()

1. np.zeros(n) -> vector with n zeros
2. np.zeros((m, n)) -> Matrix with m rows and n columns

In [19]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [26]:
np.zeros(10, dtype=np.uint8)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [20]:
np.zeros((5, 7))  #The dimension will be in a tuple

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [21]:
# specify the data type (optional)
np.zeros(10, dtype=np.uint8)

#Here we are specifying the data as unsigned 8 bit integer (range of 0 to 255)
# let's compare the size with float64
#using a same amount space, if the float64 can store 1 image then  uint8 can store 72057594037927936 images

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

## ones()

In [22]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

## linspace(start, end, n)
creates n no. of equally spaced values from start to end both included.

In [28]:
np.linspace(0, 5, 5)

array([0.  , 1.25, 2.5 , 3.75, 5.  ])

In [29]:
np.linspace(0, 10, 11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [31]:
np.linspace(1, 2, 23)

array([1.        , 1.04545455, 1.09090909, 1.13636364, 1.18181818,
       1.22727273, 1.27272727, 1.31818182, 1.36363636, 1.40909091,
       1.45454545, 1.5       , 1.54545455, 1.59090909, 1.63636364,
       1.68181818, 1.72727273, 1.77272727, 1.81818182, 1.86363636,
       1.90909091, 1.95454545, 2.        ])

## Identity Matrix: eye(n)

In [32]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

## Random Functions

### randn

In [34]:
np.random.randn(10)  #10 values taken from a normal distribution
# normal distribution has a mean of 0 and std of 1

array([-1.70215702, -0.72859909,  0.93534885, -0.77472187,  1.04100224,
        0.08377871,  0.19471071, -1.00589156, -2.43812312, -0.8261593 ])

In [38]:
np.random.randn(3, 5)  #tuple not used

array([[-0.24175003,  0.62640425, -1.54487503, -0.97910455, -0.26291312],
       [-2.1904026 ,  0.3456324 , -2.4195524 ,  0.17566475,  1.23407768],
       [ 0.39815301, -0.48417199,  0.02572452, -0.2744416 , -0.00797852]])

In [40]:
ar = np.random.randn(10)
print(ar.mean(), ar.std())

-0.32376092237662696 0.41663353040401013


### rand

In [37]:
np.random.rand(10)  #10 values taken from a uniform distribution
# Uniform distribution -> all values in range [0, 1) is equally likely

array([0.63759675, 0.35350264, 0.54145932, 0.85011101, 0.37077308,
       0.40859466, 0.03659617, 0.07639579, 0.44569437, 0.79334619])

In [41]:
np.random.rand(3, 5)

array([[0.34094247, 0.1024553 , 0.84701048, 0.48721148, 0.82449298],
       [0.40258745, 0.49133687, 0.63354498, 0.16318485, 0.41683809],
       [0.82608052, 0.65059034, 0.54190593, 0.42703977, 0.01052327]])

### randint(start, end, size)

In [42]:
#random integers in the range [start, end]
np.random.randint(0, 100, 50)  #50 values in the range of 0 to 99

array([18, 95, 27, 11, 76, 54, 77,  0, 28, 23, 86, 59, 80, 18, 13, 53, 37,
       41, 80, 84, 67, 95, 46, 22, 55, 17,  1, 21, 56, 19, 22,  2, 67, 55,
       92, 40, 61, 10, 27,  1, 27, 86,  7, 60,  8, 49, 24, 97, 60, 40])

In [43]:
np.random.randint(0, 100, (5, 5))

array([[70, 97, 26, 12, 80],
       [27, 62, 56, 36, 36],
       [84, 36, 57, 52, 23],
       [30, 60,  8, 61, 37],
       [82, 11,  4, 63, 97]])

In [44]:
np.random.randint(0, 100, (2, 5, 5))

array([[[33, 82, 89, 96, 10],
        [94, 14, 53, 52,  0],
        [78, 71, 48,  9, 26],
        [88,  3, 86,  6, 21],
        [84, 23,  6, 54, 50]],

       [[27, 94, 42, 29, 74],
        [60, 22,  8,  2, 58],
        [ 8, 93, 96, 88, 26],
        [35, 51, 53, 89, 11],
        [48, 58, 13,  4, 45]]])

### random()

In [45]:
np.random.random()  # A single real number in the range [0, 1)

0.2449520239942461

In [46]:
np.random.random(10)

array([1.90290538e-01, 5.57336662e-01, 2.56325673e-01, 4.87296763e-01,
       4.12370091e-04, 9.31214076e-01, 7.59246651e-01, 7.79453581e-02,
       9.02186336e-01, 6.42805310e-01])

In [47]:
np.random.random((3, 5))

array([[0.052852  , 0.1380643 , 0.86273756, 0.20136468, 0.64351441],
       [0.09850097, 0.85273373, 0.91334605, 0.69268064, 0.43010545],
       [0.70374912, 0.45152427, 0.47701135, 0.66170132, 0.7735795 ]])

## Numpy Array Properties and Methods

In [48]:
ar = np.array([10, 20, 5, 7, 19, 26, 17])
ar

array([10, 20,  5,  7, 19, 26, 17])

In [49]:
ar.max()

26

In [50]:
ar.min()

5

In [51]:
ar.argmax()  #index of largest value

5

In [52]:
ar.argmin()  #index of smallest value

2

In [53]:
ar.sum()  #sum of all values

104

In [54]:
ar.shape  #property for dimension

(7,)

### Reshape & Resize

In [55]:
ar = np.random.rand(24)

In [56]:
ar.shape

(24,)

In [59]:
ar = ar.reshape(8, 3)
# 8*3 = 24. The new shape should have the same number of elements as old shape
# ar.reshape() does not odify the original array, but returns the modified array

In [60]:
ar

array([[0.88487462, 0.69380641, 0.63134197],
       [0.6781184 , 0.92974015, 0.45254061],
       [0.23620726, 0.46090278, 0.16307584],
       [0.01565957, 0.31519582, 0.752768  ],
       [0.1882306 , 0.68318982, 0.16799281],
       [0.01908443, 0.55173259, 0.19214897],
       [0.42581525, 0.84217985, 0.48779722],
       [0.27471096, 0.31582097, 0.60706187]])

In [65]:
#alternative
ar.resize(8, 3)
# 8*3 = 24. The new shape should have the same number of elements as old shape
# ar.resize() modifies the original array
# it returns none

In [66]:
ar.shape

(8, 3)

In [67]:
ar

array([[0.88487462, 0.69380641, 0.63134197],
       [0.6781184 , 0.92974015, 0.45254061],
       [0.23620726, 0.46090278, 0.16307584],
       [0.01565957, 0.31519582, 0.752768  ],
       [0.1882306 , 0.68318982, 0.16799281],
       [0.01908443, 0.55173259, 0.19214897],
       [0.42581525, 0.84217985, 0.48779722],
       [0.27471096, 0.31582097, 0.60706187]])

In [68]:
ar.mean()  #average

0.4570831990171544

In [69]:
ar.std()  #standard deviation

0.2648307799254992

In [70]:
ar.sort()  #will sort the original array

In [71]:
ar

array([[0.63134197, 0.69380641, 0.88487462],
       [0.45254061, 0.6781184 , 0.92974015],
       [0.16307584, 0.23620726, 0.46090278],
       [0.01565957, 0.31519582, 0.752768  ],
       [0.16799281, 0.1882306 , 0.68318982],
       [0.01908443, 0.19214897, 0.55173259],
       [0.42581525, 0.48779722, 0.84217985],
       [0.27471096, 0.31582097, 0.60706187]])

In [75]:
ar.resize(24)
ar.sort()
print(ar)

[0.01565957 0.01908443 0.16307584 0.16799281 0.1882306  0.19214897
 0.23620726 0.27471096 0.31519582 0.31582097 0.42581525 0.45254061
 0.46090278 0.48779722 0.55173259 0.60706187 0.63134197 0.6781184
 0.68318982 0.69380641 0.752768   0.84217985 0.88487462 0.92974015]


## Indexing & Slicing

In [76]:
ar = np.arange(0, 11)

In [77]:
ar

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [78]:
ar[-1]

10

In [79]:
ar[2:7]

array([2, 3, 4, 5, 6])

In [80]:
ar[2:]

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10])

In [81]:
ar[:7]

array([0, 1, 2, 3, 4, 5, 6])

In [82]:
ar[2:8:2]

array([2, 4, 6])

* For 1D numpy array, indexing and slicing is same as Python lists

* Only difference is that it allows broadcasting

In [83]:
ar

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [86]:
ar[1:5] = 100  #broadcasting

In [87]:
ar

array([  0, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

## Multidimensional Indexing & Slicing

In [88]:
ar = np.random.rand(4, 5)

In [89]:
ar

array([[0.77588183, 0.85916592, 0.22282597, 0.31010692, 0.3853697 ],
       [0.9122455 , 0.41285003, 0.64774048, 0.23529587, 0.49481909],
       [0.06099989, 0.03388533, 0.36832107, 0.19545161, 0.99601406],
       [0.1140568 , 0.17584934, 0.67846486, 0.31368302, 0.5200351 ]])

In [91]:
ar[1][3]  #not recommended

0.23529586879296427

In [92]:
ar[1, 3]  #preferred

0.23529586879296427

In [93]:
ar.shape

(4, 5)

In [94]:
ar[:3, 1:4]
# rows 0, 1, 2 and column 1, 2, 3

array([[0.85916592, 0.22282597, 0.31010692],
       [0.41285003, 0.64774048, 0.23529587],
       [0.03388533, 0.36832107, 0.19545161]])

In [95]:
ar[1:3, 0:3]

array([[0.9122455 , 0.41285003, 0.64774048],
       [0.06099989, 0.03388533, 0.36832107]])

## Copy

In [96]:
ar

array([[0.77588183, 0.85916592, 0.22282597, 0.31010692, 0.3853697 ],
       [0.9122455 , 0.41285003, 0.64774048, 0.23529587, 0.49481909],
       [0.06099989, 0.03388533, 0.36832107, 0.19545161, 0.99601406],
       [0.1140568 , 0.17584934, 0.67846486, 0.31368302, 0.5200351 ]])

In [97]:
id(ar)

1654012996384

In [98]:
ar2 = ar

In [99]:
id(ar2)

1654012996384

In [101]:
ar2 is ar  #they are same object

True

In [104]:
ar3 = ar[:, :]  #copying numpy array
# copy() is usually used for 1 dimensional lists

In [105]:
id(ar3)

1654012995904

## Conditional Selection

In [106]:
ar = np.random.randn(40)

In [107]:
ar

array([-0.04311421,  0.52482498,  3.23545039,  0.82964812, -0.1319813 ,
       -0.29057912, -0.61840203, -1.95717716, -1.91624214,  0.92291727,
       -0.36210892, -1.89292892,  0.99691666,  0.76072715,  1.06296862,
       -0.16380388, -0.03878744,  0.10022597,  1.99592293,  0.29337864,
        1.82057215,  0.58233076, -1.45237591,  1.5893436 ,  2.07886514,
       -0.00811185,  0.88157121, -1.53045536,  0.51895222,  0.10381383,
        1.20762611,  2.33571671, -0.85258477,  0.22798709,  0.25615068,
       -2.31523965,  0.77739987, -0.09309876, -1.27804759, -1.08973046])

In [111]:
condition = ar >= 0
# not possible on lists
# Numpy applies the condition to each and every element and creates a boolean array

In [114]:
condition  #true means +ve and false means -ve

array([False,  True,  True,  True, False, False, False, False, False,
        True, False, False,  True,  True,  True, False, False,  True,
        True,  True,  True,  True, False,  True,  True, False,  True,
       False,  True,  True,  True,  True, False,  True,  True, False,
        True, False, False, False])

In [115]:
ar[condition]  #returns the values for which condition is true

array([0.52482498, 3.23545039, 0.82964812, 0.92291727, 0.99691666,
       0.76072715, 1.06296862, 0.10022597, 1.99592293, 0.29337864,
       1.82057215, 0.58233076, 1.5893436 , 2.07886514, 0.88157121,
       0.51895222, 0.10381383, 1.20762611, 2.33571671, 0.22798709,
       0.25615068, 0.77739987])

In [117]:
ar[ar >= 0]  # same thing but in one line

array([0.52482498, 3.23545039, 0.82964812, 0.92291727, 0.99691666,
       0.76072715, 1.06296862, 0.10022597, 1.99592293, 0.29337864,
       1.82057215, 0.58233076, 1.5893436 , 2.07886514, 0.88157121,
       0.51895222, 0.10381383, 1.20762611, 2.33571671, 0.22798709,
       0.25615068, 0.77739987])

In [119]:
ar[(ar > 0.2) | (ar < -0.5)]
# and/or cannot be used
# we need to use &, |, ~ (bitwise operator)

array([ 0.52482498,  3.23545039,  0.82964812, -0.61840203, -1.95717716,
       -1.91624214,  0.92291727, -1.89292892,  0.99691666,  0.76072715,
        1.06296862,  1.99592293,  0.29337864,  1.82057215,  0.58233076,
       -1.45237591,  1.5893436 ,  2.07886514,  0.88157121, -1.53045536,
        0.51895222,  1.20762611,  2.33571671, -0.85258477,  0.22798709,
        0.25615068, -2.31523965,  0.77739987, -1.27804759, -1.08973046])

## Numpy array operations

In [120]:
ar = np.arange(0, 11)

In [121]:
ar

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [122]:
[0, 1] + [0, 1]

[0, 1, 0, 1]

In [123]:
ar + ar

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [124]:
ar + 1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [125]:
ar * 2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [126]:
ar ** 3

array([   0,    1,    8,   27,   64,  125,  216,  343,  512,  729, 1000],
      dtype=int32)

In [127]:
ar + 50

array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60])

In [128]:
ar - 50

array([-50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40])

In [129]:
# original array does not change
ar

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [130]:
ar-=100

In [131]:
ar

array([-100,  -99,  -98,  -97,  -96,  -95,  -94,  -93,  -92,  -91,  -90])

## Division by zero

In [132]:
10/0

ZeroDivisionError: division by zero

In [133]:
ar = np.array([0, -10, 10])

In [136]:
ar/0
# nan -> not a number

  ar/0
  ar/0


array([ nan, -inf,  inf])

## Other functions

In [137]:
ar = np.arange(10)
ar

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [138]:
np.power(ar, 2)

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [140]:
ar = ar ** 2
ar

array([   0,    1,   16,   81,  256,  625, 1296, 2401, 4096, 6561],
      dtype=int32)

In [141]:
np.sqrt(ar)

array([ 0.,  1.,  4.,  9., 16., 25., 36., 49., 64., 81.])

In [142]:
np.sin(ar)

array([ 0.        ,  0.84147098, -0.28790332, -0.62988799, -0.99920803,
        0.17601627,  0.99567579,  0.7333338 , -0.59464199,  0.97670744])

In [143]:
ar = np.arange(1, 25)

In [144]:
np.log(ar)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509,
       2.39789527, 2.48490665, 2.56494936, 2.63905733, 2.7080502 ,
       2.77258872, 2.83321334, 2.89037176, 2.94443898, 2.99573227,
       3.04452244, 3.09104245, 3.13549422, 3.17805383])

In [146]:
ar.sum()  # all elements

300