# NumPy

numpy is python's package for doing math that is more advanced than +-*/

This includes special functions like cosine, exponential, sqrt, ...

On top of this we can use numpy to generate samples from many types of random variables

numpy also has a powerful data type to define vectors, matrices, and tensors

With these data types numpy also allows us to do linear algebra - matrix multiplication and matrix-vector solutions

In [1]:
# the first step of using numpy is to tell python to use it
import numpy as np

In [2]:
print(np.cos(np.pi))
print(np.sqrt(1.21))
print(np.log(np.exp(5.2)))

-1.0
1.1
5.2


In [3]:
# we can create numpy arrays by converting lists
# this is a vector
vec = np.array([1,2,3])
print(vec)
# we can create matrices by converting lists of lists
mat = np.array([[1,2,1],[4,5,9],[1,8,9]])
print('')
print(mat)
print('')
print(mat.T)

[1 2 3]

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[[1 4 1]
 [2 5 8]
 [1 9 9]]


In [4]:
# there are lots of other ways to create numpy arrays
vec2 = np.arange(0,15)
print(vec2)
print('')
vec3 = np.arange(3,21,6)
print(vec3)


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]

[ 3  9 15]


In [7]:

vec4 = np.linspace(0,5,10)
print(vec4)
print('')
print(vec4.reshape(5,2))
vec4_reshaped = vec4.reshape(5,2)
print(vec4_reshaped)
print(vec4)

[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]

[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]


In [8]:
mat2 = np.zeros([5,3]) #you can use bracket notation
print(mat2)
mat3 = np.ones((3,5)) #or you can use parentheses notation
print('')
print(mat3)
mat4 = np.eye(5) #identity matrix 5x5
print('')
print(mat4)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [9]:
# we can +-*/ arrays together if they're the right size
vec5 = np.arange(1,6)
vec6 = np.arange(3,8)
print(vec5)
print(vec6)
print(vec5+vec6)
print(vec5*vec6)
print(1/vec5)
print(np.sqrt(vec6))

[1 2 3 4 5]
[3 4 5 6 7]
[ 4  6  8 10 12]
[ 3  8 15 24 35]
[1.         0.5        0.33333333 0.25       0.2       ]
[1.73205081 2.         2.23606798 2.44948974 2.64575131]


In [12]:
# we can do matrix multiplication
print(mat)
print('')
print(vec)
print()
product = np.matmul(mat,vec)
print(product)

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[1 2 3]

[ 8 41 44]


In [17]:
mat.shape

(3, 3)

In [18]:
vec.shape

(3,)

In [19]:
product.shape

(3,)

In [None]:
#numpy arrays when assigned are column vectors
#vec = np.array([1,2,3]) is a column vector
# [[1]
#  [2]
#  [3]] so its dimensions are 3x1 (rows by columns)

In [20]:
print(np.linalg.solve(mat,product)) #function used to solve for unknown vector [V]
print('')                           #such that [mat] * [V] = [product]
print(np.linalg.inv(mat))           #calculate the inverse of a given matrix

[1. 2. 3.]

[[ 0.5         0.18518519 -0.24074074]
 [ 0.5        -0.14814815  0.09259259]
 [-0.5         0.11111111  0.05555556]]


In [21]:
# we can find the unique values in an array
# arrays have to be of the same data type so 6 below is typecaste as a string
vec7 = np.array(['blue','red','orange','purple','purple','orange','Red',6])
print(vec7)
print(np.unique(vec7))

['blue' 'red' 'orange' 'purple' 'purple' 'orange' 'Red' '6']
['6' 'Red' 'blue' 'orange' 'purple' 'red']


In [22]:
# we can also use numpy to generate samples of a random variable
rand_mat = np.random.rand(5,5) # independent uniform/normally distributed random variable
print(rand_mat)
rand_mat2 = np.random.randn(10,5) # standard normal (mean = 0, std = 1) random variable
print('')
print(rand_mat2)

[[0.8668461  0.8826434  0.41033896 0.76600159 0.10674056]
 [0.3116943  0.62585138 0.2923542  0.57709176 0.87952293]
 [0.81456186 0.66252888 0.76555147 0.33097258 0.41830476]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]

[[-1.43884916  0.32917039 -0.00262172  0.20306565 -0.52836778]
 [ 0.28887855 -0.71325347  1.56360601 -0.0693306  -1.23216106]
 [-1.29677755 -2.01486957  0.25639082  0.60136977 -1.39326532]
 [ 1.14834652  0.53353451  0.12555914 -0.23599909 -0.81183993]
 [-1.74400577  0.61543584  0.64640967 -0.19427916  2.00729726]
 [ 0.19516901 -0.97951908 -0.5753062  -0.10749662  1.41912101]
 [-1.44790776  1.53658579  0.27924718 -1.18620582 -0.06037816]
 [-0.49761454  0.45197836 -0.07716579  1.40859073 -0.84557922]
 [ 0.43619861 -0.40457839 -1.00306104  2.0014103   0.67532736]
 [-1.00426202  1.50532703  0.84281985  0.18921886  0.29799255]]


In [23]:
# we can also use numpy for statistical tools on arrays
print(np.mean(rand_mat))
print(np.std(rand_mat2))

0.5400339260132214
0.9776984733339321


In [24]:
print(np.min(rand_mat))
print(np.max(rand_mat2))

0.08544005962928192
2.0072972642709916


In [None]:
# break here for next video!

# 2.1 Test Your Understanding

Select the correct choice (True/False) for the below statement:

We can create matrices by converting lists of lists.

TRUE

In [25]:
# What will be the output of the following code:

vec = np.arange(10, 20, 3)
print(vec)

[10 13 16 19]


In [26]:
# What will be the output of the following code?

vec=np.linspace(10,20,3)
print(vec)

[10. 15. 20.]


In [27]:
# Choose the correct answer by working on the following code:

vec1 = np.arange(0, 12)
vec2 = vec1.reshape(4, 3)
print(vec2)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


Select the correct choice (True/False) for the below statement: 

An Identity matrix is a square matrix in which all the elements of the principal diagonal are ones and all other elements are zeros.

TRUE

Select the correct choice (True/False) for the below statement:

"We can have elements of different data types in a given array"

FALSE

In [35]:
# Which of the following snippets of code are valid function(s) for NumPy?
# A. numpy.cos(numpy.pi)
# B. numpy.sqrt(1.44)
# C. numpy.log(6)
# D. numpy.exp(4)

np.cos(np.pi)

-1.0

In [32]:
np.sqrt(1.44)

1.2

In [33]:
np.log(6)

1.791759469228055

In [34]:
np.exp(4)

54.598150033144236

In [37]:
# What is the output of the following code:

import numpy as np
arr = np.arange(4, 20, 5)
print(arr)

[ 4  9 14 19]


In [38]:
# how do we access entries in a numpy vector
rand_vec = np.random.randn(19) #standard normal distribution
print(rand_vec)
print(rand_vec[6])

[ 0.5552267  -0.62783824 -0.57746665  1.63821261 -0.78745115 -0.46399657
 -1.30785462 -0.3243391  -0.42992401 -0.15284381 -0.1245569  -0.25740279
 -0.09403078 -0.84353263 -0.93776036 -0.74766233  0.69553554 -0.5490471
  0.55772154]
-1.307854622022354


In [39]:
# we can access multiple entries at once using :
print(rand_vec[4:9])

[-0.78745115 -0.46399657 -1.30785462 -0.3243391  -0.42992401]


In [40]:
# we can also access multiple non-consecutive entries using np.arange
print(np.arange(0,15,3))
print(rand_vec[np.arange(0,15,3)])

[ 0  3  6  9 12]
[ 0.5552267   1.63821261 -1.30785462 -0.15284381 -0.09403078]


In [41]:
# what about matrices
print(rand_mat)
print(rand_mat[1][2])
print(rand_mat[1,2])


[[0.8668461  0.8826434  0.41033896 0.76600159 0.10674056]
 [0.3116943  0.62585138 0.2923542  0.57709176 0.87952293]
 [0.81456186 0.66252888 0.76555147 0.33097258 0.41830476]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]
0.29235420206261575
0.29235420206261575


In [42]:
print(rand_mat[0:2,1:3]) #sub-matrix of matrix above

[[0.8826434  0.41033896]
 [0.62585138 0.2923542 ]]


In [43]:
# let's change some values in an array!
print(rand_vec)
rand_vec[3:5] = 4
print('')
print(rand_vec)
rand_vec[3:5] = [1,2]
print('')
print(rand_vec)

[ 0.5552267  -0.62783824 -0.57746665  1.63821261 -0.78745115 -0.46399657
 -1.30785462 -0.3243391  -0.42992401 -0.15284381 -0.1245569  -0.25740279
 -0.09403078 -0.84353263 -0.93776036 -0.74766233  0.69553554 -0.5490471
  0.55772154]

[ 0.5552267  -0.62783824 -0.57746665  4.          4.         -0.46399657
 -1.30785462 -0.3243391  -0.42992401 -0.15284381 -0.1245569  -0.25740279
 -0.09403078 -0.84353263 -0.93776036 -0.74766233  0.69553554 -0.5490471
  0.55772154]

[ 0.5552267  -0.62783824 -0.57746665  1.          2.         -0.46399657
 -1.30785462 -0.3243391  -0.42992401 -0.15284381 -0.1245569  -0.25740279
 -0.09403078 -0.84353263 -0.93776036 -0.74766233  0.69553554 -0.5490471
  0.55772154]


In [44]:
print(rand_mat)
rand_mat[1:3,3:5] = 0
print('')
print(rand_mat)

[[0.8668461  0.8826434  0.41033896 0.76600159 0.10674056]
 [0.3116943  0.62585138 0.2923542  0.57709176 0.87952293]
 [0.81456186 0.66252888 0.76555147 0.33097258 0.41830476]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]

[[0.8668461  0.8826434  0.41033896 0.76600159 0.10674056]
 [0.3116943  0.62585138 0.2923542  0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]


In [45]:
sub_mat = rand_mat[0:2,0:3]
print(sub_mat)
sub_mat[:] = 3
print(sub_mat)


[[0.8668461  0.8826434  0.41033896]
 [0.3116943  0.62585138 0.2923542 ]]
[[3. 3. 3.]
 [3. 3. 3.]]


In [46]:
print(rand_mat)

[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.         3.         0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]


In [47]:
sub_mat2 = rand_mat[0:2,0:3].copy()
sub_mat2[:] = 99
print(sub_mat2)
print(rand_mat)


[[99. 99. 99.]
 [99. 99. 99.]]
[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.         3.         0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]


In [None]:
# break here for next video

# 2.2 Test Your Understanding

In [48]:
# What will be the output of the following code:

vec = np.arange(10,20)
vec2 = vec[np.arange(0,10,3)]
print(vec2)

[10 13 16 19]


Select the correct choice (True/False) for the below statement:

Can we replace/change any value in an array?

TRUE

In [50]:
# In the code below, if we change array2, then array1 will also be affected by the change.

array1 = np.array([[1,2,3],[4,5,6],[7,8,9]])
array2 = array1

# TRUE

In [51]:
# What will be the output of the following code?

matrix = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(matrix[1][1])

5


In [52]:
# we can also access entries with logicals
rand_vec = np.random.randn(15)

print(rand_vec)
print(rand_vec>0)
print(rand_vec[rand_vec>0])

[-0.45946054 -0.4793546   0.73385509 -0.81534104  0.4198816  -1.61199058
  0.32303833 -0.66553559 -0.08901164  1.63008527  1.30045838  1.33758623
 -0.53380077 -1.82586007  0.0241793 ]
[False False  True False  True False  True False False  True  True  True
 False False  True]
[0.73385509 0.4198816  0.32303833 1.63008527 1.30045838 1.33758623
 0.0241793 ]


In [53]:
print(rand_mat2)
print(rand_mat2[rand_mat2>0])

[[-1.43884916  0.32917039 -0.00262172  0.20306565 -0.52836778]
 [ 0.28887855 -0.71325347  1.56360601 -0.0693306  -1.23216106]
 [-1.29677755 -2.01486957  0.25639082  0.60136977 -1.39326532]
 [ 1.14834652  0.53353451  0.12555914 -0.23599909 -0.81183993]
 [-1.74400577  0.61543584  0.64640967 -0.19427916  2.00729726]
 [ 0.19516901 -0.97951908 -0.5753062  -0.10749662  1.41912101]
 [-1.44790776  1.53658579  0.27924718 -1.18620582 -0.06037816]
 [-0.49761454  0.45197836 -0.07716579  1.40859073 -0.84557922]
 [ 0.43619861 -0.40457839 -1.00306104  2.0014103   0.67532736]
 [-1.00426202  1.50532703  0.84281985  0.18921886  0.29799255]]
[0.32917039 0.20306565 0.28887855 1.56360601 0.25639082 0.60136977
 1.14834652 0.53353451 0.12555914 0.61543584 0.64640967 2.00729726
 0.19516901 1.41912101 1.53658579 0.27924718 0.45197836 1.40859073
 0.43619861 2.0014103  0.67532736 1.50532703 0.84281985 0.18921886
 0.29799255]


In [54]:

print(rand_vec)
print('')
rand_vec[rand_vec>0.5] = -5
print(rand_vec)

[-0.45946054 -0.4793546   0.73385509 -0.81534104  0.4198816  -1.61199058
  0.32303833 -0.66553559 -0.08901164  1.63008527  1.30045838  1.33758623
 -0.53380077 -1.82586007  0.0241793 ]

[-0.45946054 -0.4793546  -5.         -0.81534104  0.4198816  -1.61199058
  0.32303833 -0.66553559 -0.08901164 -5.         -5.         -5.
 -0.53380077 -1.82586007  0.0241793 ]


In [55]:
# let's save some arrays on the disk for use later!
np.save('saved_file_name',rand_mat2)


In [56]:
np.savez('zipped_file_name',rand_mat=rand_mat,rand_mat2=rand_mat2)

In [57]:
# now let's load it
loaded_vec = np.load('saved_file_name.npy')
loaded_zip = np.load('zipped_file_name.npz')

print(loaded_vec)
print('')
print(loaded_zip)

[[-1.43884916  0.32917039 -0.00262172  0.20306565 -0.52836778]
 [ 0.28887855 -0.71325347  1.56360601 -0.0693306  -1.23216106]
 [-1.29677755 -2.01486957  0.25639082  0.60136977 -1.39326532]
 [ 1.14834652  0.53353451  0.12555914 -0.23599909 -0.81183993]
 [-1.74400577  0.61543584  0.64640967 -0.19427916  2.00729726]
 [ 0.19516901 -0.97951908 -0.5753062  -0.10749662  1.41912101]
 [-1.44790776  1.53658579  0.27924718 -1.18620582 -0.06037816]
 [-0.49761454  0.45197836 -0.07716579  1.40859073 -0.84557922]
 [ 0.43619861 -0.40457839 -1.00306104  2.0014103   0.67532736]
 [-1.00426202  1.50532703  0.84281985  0.18921886  0.29799255]]

<numpy.lib.npyio.NpzFile object at 0x7ff64b866fd0>


In [58]:
print(loaded_zip['rand_mat'])
print('')
print(loaded_zip['rand_mat2'])

new_array  = loaded_zip['rand_mat']
print(new_array)

[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.         3.         0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]

[[-1.43884916  0.32917039 -0.00262172  0.20306565 -0.52836778]
 [ 0.28887855 -0.71325347  1.56360601 -0.0693306  -1.23216106]
 [-1.29677755 -2.01486957  0.25639082  0.60136977 -1.39326532]
 [ 1.14834652  0.53353451  0.12555914 -0.23599909 -0.81183993]
 [-1.74400577  0.61543584  0.64640967 -0.19427916  2.00729726]
 [ 0.19516901 -0.97951908 -0.5753062  -0.10749662  1.41912101]
 [-1.44790776  1.53658579  0.27924718 -1.18620582 -0.06037816]
 [-0.49761454  0.45197836 -0.07716579  1.40859073 -0.84557922]
 [ 0.43619861 -0.40457839 -1.00306104  2.0014103   0.67532736]
 [-1.00426202  1.50532703  0.84281985  0.18921886  0.29799255]]
[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.    

In [60]:
# we can also save/load as text files...but only single variables
np.savetxt('text_file_name.txt',rand_mat,delimiter=',')
rand_mat_txt = np.loadtxt('text_file_name.txt',delimiter=',')
print(rand_mat)
print('')
print(rand_mat_txt)
type(rand_mat_txt)

[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.         3.         0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]

[[3.         3.         3.         0.76600159 0.10674056]
 [3.         3.         3.         0.         0.        ]
 [0.81456186 0.66252888 0.76555147 0.         0.        ]
 [0.08544006 0.15982734 0.54493796 0.89333774 0.19226563]
 [0.78696697 0.23967522 0.76030838 0.16091921 0.96616492]]


numpy.ndarray

# 2.3 Test Your Understanding

In [61]:
# What will be the output of the following code:

vec = np.array([4,7,8,9,10,6,1])
print(vec[vec>6])

[ 7  8  9 10]


In [62]:
# What will be the output of the following code:

vec1 = np.array([4, 7, 8, 9, 10, 6, 1])
vec1[vec1>6] = 2
print(vec1)

[4 2 2 2 2 6 1]


Select the correct choice (True/False) for the below statement:

NumPy array can be saved on the disk for later use.

TRUE

The following code can be used to load a numpy array (matrix.npy is a data file):

import numpy as np
loaded_array = np.load('matrix.npy')

TRUE