# NumPy

numpy is python's package for doing math that is more advanced than +-*/

This includes special functions like cosine, exponential, sqrt, ...

On top of this we can use numpy to generate samples from many types of random variables

numpy also has a powerful data type to define vectors, matrices, and tensors

With these data types numpy also allows us to do linear algebra - matrix multiplication and matrix-vector solutions

In [1]:
# the first step of using numpy is to tell python to use it
import numpy as np

In [11]:
print(np.cos(np.pi))
print(np.pi)
print(np.sqrt(1.21))
print(np.log(np.exp(3.4)))
print(np.exp(3.4))

-1.0
3.141592653589793
1.1
3.4
29.96410004739701


In [6]:
# we can create numpy arrays by converting lists
# this is a vector
vec = np.array([1,2,3])
print(vec)
# we can create matrices by converting lists of lists
mat = np.array([[1,2,1],[4,5,9],[1,8,9]])
print('')
print(mat)
print('')
print(mat.T)

[1 2 3]

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[[1 4 1]
 [2 5 8]
 [1 9 9]]


In [14]:
# there are lots of other ways to create numpy arrays
vec2 = np.arange(0,15)
print(vec2)
print('')
vec3 = np.arange(3,22,6)
print(vec3)
print(' ')
print(vec2.reshape(3,5))


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]

[ 3  9 15 21]
 
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


In [10]:

vec4 = np.linspace(0,5,10)
print(vec4)
print('')
print(vec4.reshape(5,2))
print(' ')
vec4_reshaped = vec4.reshape(5,2)
print(vec4_reshaped)
print(vec4)

[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]

[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
 
[[0.         0.55555556]
 [1.11111111 1.66666667]
 [2.22222222 2.77777778]
 [3.33333333 3.88888889]
 [4.44444444 5.        ]]
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]


In [12]:
mat2 = np.zeros([5,3])
print(mat2)
mat3 = np.ones((3,5))
print('')
print(mat3)
mat4 = np.eye(5)
print('')
print(mat4)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [15]:
# we can +-*/ arrays together if they're the right size
vec5 = np.arange(1,6)
vec6 = np.arange(3,8)
print(vec5)
print(vec6)
print(vec5+vec6)
print(vec5*vec6)
print(1/vec5)
print(np.sqrt(vec6))

[1 2 3 4 5]
[3 4 5 6 7]
[ 4  6  8 10 12]
[ 3  8 15 24 35]
[1.         0.5        0.33333333 0.25       0.2       ]
[1.73205081 2.         2.23606798 2.44948974 2.64575131]


In [16]:
# we can do matrix multiplication
print(mat)
print('')
print(vec)
print()
product = np.matmul(mat,vec)
print(product)

[[1 2 1]
 [4 5 9]
 [1 8 9]]

[1 2 3]

[ 8 41 44]


In [17]:
# Solve a linear matrix equation, or system of linear scalar equations.
print(np.linalg.solve(mat,product))
print('')
# Compute the (multiplicative) inverse of a matrix.
print(np.linalg.inv(mat))

[1. 2. 3.]

[[ 0.5         0.18518519 -0.24074074]
 [ 0.5        -0.14814815  0.09259259]
 [-0.5         0.11111111  0.05555556]]


In [20]:
# we can find the unique values in an array
vec7 = np.array(['blue','red','orange','purple','purple','orange','Red',6])
print(vec7)
print(np.unique(vec7))

['blue' 'red' 'orange' 'purple' 'purple' 'orange' 'Red' '6']
['6' 'Red' 'blue' 'orange' 'purple' 'red']


In [21]:
# we can also use numpy to generate samples of a random variable
rand_mat = np.random.rand(5,5) # uniform random variable
print(rand_mat)
rand_mat2 = np.random.randn(10,5) # standard normal random variable
print('')
print(rand_mat2)

[[0.51463923 0.20266099 0.67381401 0.30854055 0.51287052]
 [0.4573951  0.2512215  0.99833316 0.71918289 0.28948381]
 [0.55193298 0.6729688  0.00228986 0.62446196 0.90261221]
 [0.08170826 0.66031591 0.58502755 0.37848407 0.50048242]
 [0.96772428 0.19103893 0.74908189 0.66751606 0.55839732]]

[[-1.31792162  0.27038552  2.75929834 -0.11511857 -2.23446526]
 [ 0.74713312 -1.61150265  0.42117954 -1.25237129  1.62951043]
 [-1.6342231   0.9690605   0.49927238  0.17900322 -0.82621911]
 [ 0.33291615  1.80454365 -0.12415988  0.83169468  0.51651204]
 [ 0.09785803 -0.34098738 -1.48949681 -0.44633211 -0.58378772]
 [ 0.30600833 -1.12464805  0.30406625 -0.22710992 -0.99832198]
 [ 0.09018151 -0.07989612 -0.79497093 -0.88290836 -1.86624194]
 [ 0.28107896  0.46342528 -1.1406155   1.7006389  -1.39885367]
 [ 0.1256738   0.09384156 -0.72350235  1.00292523  0.29981304]
 [ 0.30529717  0.54172173 -1.40349486 -0.13052712 -1.20168567]]


In [22]:
# we can also use numpy for statistical tools on arrays
print(np.mean(rand_mat))
print(np.std(rand_mat2))

0.5208873705391884
1.019905475282728


In [23]:
print(np.min(rand_mat))
print(np.max(rand_mat2))

0.0022898568520857143
2.7592983398507736


In [None]:
# break here for next video!

In [24]:
# how do we access entries in a numpy vector
rand_vec = np.random.randn(19)
print(rand_vec)
print(rand_vec[6])

[ 2.45906768  0.69792699  0.30379656 -0.8251078  -0.65751921 -0.90688723
 -0.07851378  0.72139419 -0.43148527 -0.0769731  -1.69926925  0.15517714
  0.81262835 -0.76054302  1.0624881  -0.93082761 -0.01753289  0.53775105
  0.93160142]
-0.07851378212526099


In [25]:
# we can access multiple entries at once using :
print(rand_vec[4:9])

[-0.65751921 -0.90688723 -0.07851378  0.72139419 -0.43148527]


In [26]:
# we can also access multiple non-consecutive entries using np.arange
print(np.arange(0,15,3))
print(rand_vec[np.arange(0,15,3)])

[ 0  3  6  9 12]
[ 2.45906768 -0.8251078  -0.07851378 -0.0769731   0.81262835]


In [27]:
# what about matrices
print(rand_mat)
print(rand_mat[1][2])
print(rand_mat[1,2])


[[0.51463923 0.20266099 0.67381401 0.30854055 0.51287052]
 [0.4573951  0.2512215  0.99833316 0.71918289 0.28948381]
 [0.55193298 0.6729688  0.00228986 0.62446196 0.90261221]
 [0.08170826 0.66031591 0.58502755 0.37848407 0.50048242]
 [0.96772428 0.19103893 0.74908189 0.66751606 0.55839732]]
0.998333159955304
0.998333159955304


In [28]:
print(rand_mat[0:2,1:3])

[[0.20266099 0.67381401]
 [0.2512215  0.99833316]]


In [29]:
# let's change some values in an array!
print(rand_vec)
rand_vec[3:5] = 4
print('')
print(rand_vec)
rand_vec[3:5] = [1,2]
print('')
print(rand_vec)

[ 2.45906768  0.69792699  0.30379656 -0.8251078  -0.65751921 -0.90688723
 -0.07851378  0.72139419 -0.43148527 -0.0769731  -1.69926925  0.15517714
  0.81262835 -0.76054302  1.0624881  -0.93082761 -0.01753289  0.53775105
  0.93160142]

[ 2.45906768  0.69792699  0.30379656  4.          4.         -0.90688723
 -0.07851378  0.72139419 -0.43148527 -0.0769731  -1.69926925  0.15517714
  0.81262835 -0.76054302  1.0624881  -0.93082761 -0.01753289  0.53775105
  0.93160142]

[ 2.45906768  0.69792699  0.30379656  1.          2.         -0.90688723
 -0.07851378  0.72139419 -0.43148527 -0.0769731  -1.69926925  0.15517714
  0.81262835 -0.76054302  1.0624881  -0.93082761 -0.01753289  0.53775105
  0.93160142]


In [30]:
print(rand_mat)
rand_mat[1:3,3:5] = 0
print('')
print(rand_mat)

[[0.51463923 0.20266099 0.67381401 0.30854055 0.51287052]
 [0.4573951  0.2512215  0.99833316 0.71918289 0.28948381]
 [0.55193298 0.6729688  0.00228986 0.62446196 0.90261221]
 [0.08170826 0.66031591 0.58502755 0.37848407 0.50048242]
 [0.96772428 0.19103893 0.74908189 0.66751606 0.55839732]]

[[0.51463923 0.20266099 0.67381401 0.30854055 0.51287052]
 [0.4573951  0.2512215  0.99833316 0.         0.        ]
 [0.55193298 0.6729688  0.00228986 0.         0.        ]
 [0.08170826 0.66031591 0.58502755 0.37848407 0.50048242]
 [0.96772428 0.19103893 0.74908189 0.66751606 0.55839732]]


In [31]:
sub_mat = rand_mat[0:2,0:3]
print(sub_mat)
sub_mat[:] = 3
print(sub_mat)


[[0.51463923 0.20266099 0.67381401]
 [0.4573951  0.2512215  0.99833316]]
[[3. 3. 3.]
 [3. 3. 3.]]


In [None]:
print(rand_mat)

In [32]:
sub_mat2 = rand_mat[0:2,0:3].copy()
sub_mat2[:] = 99
print(sub_mat2)
print(rand_mat)


[[99. 99. 99.]
 [99. 99. 99.]]
[[3.00000000e+00 3.00000000e+00 3.00000000e+00 3.08540553e-01
  5.12870521e-01]
 [3.00000000e+00 3.00000000e+00 3.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [5.51932978e-01 6.72968801e-01 2.28985685e-03 0.00000000e+00
  0.00000000e+00]
 [8.17082583e-02 6.60315912e-01 5.85027546e-01 3.78484074e-01
  5.00482421e-01]
 [9.67724283e-01 1.91038933e-01 7.49081894e-01 6.67516060e-01
  5.58397316e-01]]


In [None]:
# break here for next video

In [33]:
# we can also access entries with logicals
rand_vec = np.random.randn(15)

print(rand_vec)
print(rand_vec>0)
print(rand_vec[rand_vec>0])

[-0.3932279  -1.31524388 -0.0679772   1.37187531  0.65102203  1.00618949
 -0.13712709 -0.8021611  -0.67875947  0.33932539  0.61393832 -0.72530207
 -0.19801649 -0.5989619  -0.8728723 ]
[False False False  True  True  True False False False  True  True False
 False False False]
[1.37187531 0.65102203 1.00618949 0.33932539 0.61393832]


In [34]:
print(rand_mat2)
print(rand_mat2[rand_mat2>0])

[[-1.31792162  0.27038552  2.75929834 -0.11511857 -2.23446526]
 [ 0.74713312 -1.61150265  0.42117954 -1.25237129  1.62951043]
 [-1.6342231   0.9690605   0.49927238  0.17900322 -0.82621911]
 [ 0.33291615  1.80454365 -0.12415988  0.83169468  0.51651204]
 [ 0.09785803 -0.34098738 -1.48949681 -0.44633211 -0.58378772]
 [ 0.30600833 -1.12464805  0.30406625 -0.22710992 -0.99832198]
 [ 0.09018151 -0.07989612 -0.79497093 -0.88290836 -1.86624194]
 [ 0.28107896  0.46342528 -1.1406155   1.7006389  -1.39885367]
 [ 0.1256738   0.09384156 -0.72350235  1.00292523  0.29981304]
 [ 0.30529717  0.54172173 -1.40349486 -0.13052712 -1.20168567]]
[0.27038552 2.75929834 0.74713312 0.42117954 1.62951043 0.9690605
 0.49927238 0.17900322 0.33291615 1.80454365 0.83169468 0.51651204
 0.09785803 0.30600833 0.30406625 0.09018151 0.28107896 0.46342528
 1.7006389  0.1256738  0.09384156 1.00292523 0.29981304 0.30529717
 0.54172173]


In [35]:

print(rand_vec)
print('')
rand_vec[rand_vec>0.5] = -5
print(rand_vec)

[-0.3932279  -1.31524388 -0.0679772   1.37187531  0.65102203  1.00618949
 -0.13712709 -0.8021611  -0.67875947  0.33932539  0.61393832 -0.72530207
 -0.19801649 -0.5989619  -0.8728723 ]

[-0.3932279  -1.31524388 -0.0679772  -5.         -5.         -5.
 -0.13712709 -0.8021611  -0.67875947  0.33932539 -5.         -0.72530207
 -0.19801649 -0.5989619  -0.8728723 ]


In [36]:
# let's save some arrays on the disk for use later!
np.save('saved_file_name',rand_mat2)


In [37]:
np.savez('zipped_file_name',rand_mat=rand_mat,rand_mat2=rand_mat2)

In [39]:
# now let's load it
loaded_vec = np.load('saved_file_name.npy')
loaded_zip = np.load('zipped_file_name.npz')

print(loaded_vec)
print('')
print('zip')
print(loaded_zip)

[[-1.31792162  0.27038552  2.75929834 -0.11511857 -2.23446526]
 [ 0.74713312 -1.61150265  0.42117954 -1.25237129  1.62951043]
 [-1.6342231   0.9690605   0.49927238  0.17900322 -0.82621911]
 [ 0.33291615  1.80454365 -0.12415988  0.83169468  0.51651204]
 [ 0.09785803 -0.34098738 -1.48949681 -0.44633211 -0.58378772]
 [ 0.30600833 -1.12464805  0.30406625 -0.22710992 -0.99832198]
 [ 0.09018151 -0.07989612 -0.79497093 -0.88290836 -1.86624194]
 [ 0.28107896  0.46342528 -1.1406155   1.7006389  -1.39885367]
 [ 0.1256738   0.09384156 -0.72350235  1.00292523  0.29981304]
 [ 0.30529717  0.54172173 -1.40349486 -0.13052712 -1.20168567]]

zip
<numpy.lib.npyio.NpzFile object at 0x000001FFB4374708>


In [40]:
print(loaded_zip['rand_mat'])
print('')
print(loaded_zip['rand_mat2'])

new_array  = loaded_zip['rand_mat']
print(new_array)

[[3.00000000e+00 3.00000000e+00 3.00000000e+00 3.08540553e-01
  5.12870521e-01]
 [3.00000000e+00 3.00000000e+00 3.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [5.51932978e-01 6.72968801e-01 2.28985685e-03 0.00000000e+00
  0.00000000e+00]
 [8.17082583e-02 6.60315912e-01 5.85027546e-01 3.78484074e-01
  5.00482421e-01]
 [9.67724283e-01 1.91038933e-01 7.49081894e-01 6.67516060e-01
  5.58397316e-01]]

[[-1.31792162  0.27038552  2.75929834 -0.11511857 -2.23446526]
 [ 0.74713312 -1.61150265  0.42117954 -1.25237129  1.62951043]
 [-1.6342231   0.9690605   0.49927238  0.17900322 -0.82621911]
 [ 0.33291615  1.80454365 -0.12415988  0.83169468  0.51651204]
 [ 0.09785803 -0.34098738 -1.48949681 -0.44633211 -0.58378772]
 [ 0.30600833 -1.12464805  0.30406625 -0.22710992 -0.99832198]
 [ 0.09018151 -0.07989612 -0.79497093 -0.88290836 -1.86624194]
 [ 0.28107896  0.46342528 -1.1406155   1.7006389  -1.39885367]
 [ 0.1256738   0.09384156 -0.72350235  1.00292523  0.29981304]
 [ 0.30529717  0.54172173 -1.40

In [41]:
# we can also save/load as text files...but only single variables
np.savetxt('text_file_name.txt',rand_mat,delimiter=',')
rand_mat_txt = np.loadtxt('text_file_name.txt',delimiter=',')
print(rand_mat)
print('')
print(rand_mat_txt)

[[3.00000000e+00 3.00000000e+00 3.00000000e+00 3.08540553e-01
  5.12870521e-01]
 [3.00000000e+00 3.00000000e+00 3.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [5.51932978e-01 6.72968801e-01 2.28985685e-03 0.00000000e+00
  0.00000000e+00]
 [8.17082583e-02 6.60315912e-01 5.85027546e-01 3.78484074e-01
  5.00482421e-01]
 [9.67724283e-01 1.91038933e-01 7.49081894e-01 6.67516060e-01
  5.58397316e-01]]

[[3.00000000e+00 3.00000000e+00 3.00000000e+00 3.08540553e-01
  5.12870521e-01]
 [3.00000000e+00 3.00000000e+00 3.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [5.51932978e-01 6.72968801e-01 2.28985685e-03 0.00000000e+00
  0.00000000e+00]
 [8.17082583e-02 6.60315912e-01 5.85027546e-01 3.78484074e-01
  5.00482421e-01]
 [9.67724283e-01 1.91038933e-01 7.49081894e-01 6.67516060e-01
  5.58397316e-01]]


In [1]:
# use to identify null values
import pandas as pd
import numpy as np

mpg_df = pd.read_csv('car_mpg.csv')
mpg_df = mpg_df.replace('?',np.nan) # pre-defined function replace
mpg_df['hp'] = mpg_df['hp'].astype('float64')
numeric_cols = mpg_df.drop('car name',axis=1)
print(numeric_cols.head())
print(' ')
numeric_cols = numeric_cols.apply(lambda x:x.fillna(x.median()),axis=0)
print(numeric_cols.head())
print(' ')
print('mpg 1st value ',mpg_df.mpg.median())

   mpg  cyl   disp     hp    wt   acc   yr  origin
0  NaN    8  307.0  130.0  3504  12.0  NaN       1
1   15    8  350.0  165.0  3693  11.5   70       1
2   18    8  318.0  150.0  3436  11.0   70       1
3   16    8  304.0  150.0  3433  12.0   70       1
4   17    8  302.0  140.0  3449  10.5   70       1
 
  mpg  cyl   disp     hp    wt   acc  yr  origin
0  23    8  307.0  130.0  3504  12.0  76       1
1  15    8  350.0  165.0  3693  11.5  70       1
2  18    8  318.0  150.0  3436  11.0  70       1
3  16    8  304.0  150.0  3433  12.0  70       1
4  17    8  302.0  140.0  3449  10.5  70       1
 
mpg 1st value  23.0
