<h2>Python for Data Science - NumPy Overview Examples</h2>

NumPy (or Numpy) is a Linear Algebra Library for Python, 
the reason it is so important for Data Science with Python 
is that almost all of the libraries in the PyData Ecosystem 
rely on NumPy as one of their main building blocks.

Numpy is also incredibly fast, as it has bindings to C libraries.

NumPy arrays are the main way we will use NumPy throughout the course.
Numpy arrays essentially come in two flavors:

<b>Vectors</b> - strictly 1-d arrays

<b>Matrices</b> - are 2-d arrays (but you should note a matrix can still have only one row or one column).


<h1>NumPy Arrays</h1>


In [4]:
lst = [1,2,3]

In [5]:
import numpy as np

In [6]:
arr = np.array(lst)

In [7]:
arr

array([1, 2, 3])

In [8]:
matrix = [[1,2,3],[4,5,6],[7,8,9]]

In [9]:
np.array(matrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [10]:
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
#takes the third argument as the step size you want
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [12]:
np.zeros(3)

array([0., 0., 0.])

In [13]:
#pass in tuple for 2-d matrix
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [19]:
#takes third argument as the number of points
np.linspace(0,5,100)

array([0.        , 0.05050505, 0.1010101 , 0.15151515, 0.2020202 ,
       0.25252525, 0.3030303 , 0.35353535, 0.4040404 , 0.45454545,
       0.50505051, 0.55555556, 0.60606061, 0.65656566, 0.70707071,
       0.75757576, 0.80808081, 0.85858586, 0.90909091, 0.95959596,
       1.01010101, 1.06060606, 1.11111111, 1.16161616, 1.21212121,
       1.26262626, 1.31313131, 1.36363636, 1.41414141, 1.46464646,
       1.51515152, 1.56565657, 1.61616162, 1.66666667, 1.71717172,
       1.76767677, 1.81818182, 1.86868687, 1.91919192, 1.96969697,
       2.02020202, 2.07070707, 2.12121212, 2.17171717, 2.22222222,
       2.27272727, 2.32323232, 2.37373737, 2.42424242, 2.47474747,
       2.52525253, 2.57575758, 2.62626263, 2.67676768, 2.72727273,
       2.77777778, 2.82828283, 2.87878788, 2.92929293, 2.97979798,
       3.03030303, 3.08080808, 3.13131313, 3.18181818, 3.23232323,
       3.28282828, 3.33333333, 3.38383838, 3.43434343, 3.48484848,
       3.53535354, 3.58585859, 3.63636364, 3.68686869, 3.73737

In [20]:
#identity matrix useful for linear algebra problems
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [21]:
#arrays of random number values between 0-1
np.random.rand(5)

array([0.8646523 , 0.48913574, 0.95323614, 0.47722912, 0.57578691])

In [22]:
np.random.rand(5,5)

array([[0.21882825, 0.24003658, 0.50407873, 0.78166088, 0.19646533],
       [0.92546301, 0.72647225, 0.10105021, 0.71680129, 0.84459354],
       [0.60616006, 0.36545506, 0.87196689, 0.70687264, 0.13717009],
       [0.24311189, 0.34141234, 0.61408843, 0.15952117, 0.68994627],
       [0.2183673 , 0.493169  , 0.01995401, 0.70832399, 0.62574093]])

In [23]:
#used from standard normal distribution
np.random.randn(4,4)

array([[ 0.38172199,  1.27985964, -0.33791237, -0.56996718],
       [ 1.00691022,  1.18185015,  0.20300095,  1.48563802],
       [ 0.40687101, -0.27160671, -0.33886795,  1.7126818 ],
       [-0.57449133, -0.66010229, -0.97236521, -1.29317109]])

In [24]:
#rand int returns random integers from a low to high number
np.random.randint(0,100,40)

array([57, 32, 28, 61, 60, 71, 56, 32, 52, 42, 50, 51,  0, 57,  9, 47, 42,
       91,  0, 80, 70, 40, 66, 61, 19, 90, 39,  9, 58, 14, 24, 61, 15, 31,
       59, 32,  9, 73, 70, 40])

In [25]:
#few useful attributes of arrays
arr = np.arange(25)

In [26]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [27]:
ranarr=np.random.randint(0,50,10)

In [28]:
ranarr

array([ 2, 43, 44, 14,  1, 12, 45, 38, 42, 15])

In [33]:
arr.reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [34]:
#returns max value
ranarr.max()

45

In [40]:
#returns min value
ranarr.min()

1

In [36]:
ranarr

array([ 2, 43, 44, 14,  1, 12, 45, 38, 42, 15])

In [38]:
#returns max value index locaiton
ranarr.argmax()

6

In [39]:
#returns minimum value index locaiton
ranarr.argmin()

4

In [41]:
#returns shape of array
arr.shape

(25,)

In [42]:
arr=arr.reshape(5,5)
arr.shape

(5, 5)

In [44]:
arr.dtype

dtype('int64')

In [2]:
#if you don't want to type np.random.randint all the time
from numpy.random import randint

In [5]:
randint(2,10,5)

array([3, 8, 5, 9, 6])

<h1>NumPy Indexing and Selection</h1>

In [6]:
import numpy as np

In [7]:
arr = np.arange(0,11)

In [8]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [16]:
arr[8]

8

In [12]:
arr[0:5]

array([0, 1, 2, 3, 4])

In [14]:
arr[:6]

array([0, 1, 2, 3, 4, 5])

In [17]:
arr[3:]

array([ 3,  4,  5,  6,  7,  8,  9, 10])

In [20]:
arr[0:5]=100

In [21]:
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [22]:
arr = np.arange(0,11)

In [23]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [24]:
slice_of_arr=arr[0:6]

In [26]:
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [27]:
slice_of_arr[:]=99

In [28]:
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [29]:
#data is not copied but a view of the original array and therefore it chages the orginal array
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [30]:
arr_copy=arr.copy()

In [31]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [32]:
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [33]:
arr[:]=100

In [34]:
arr

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [35]:
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [36]:
#indexing a 2d array aka matrix
arr2d=np.array([[5,10,15],[20,25,30],[35,40,45]])

In [37]:
arr2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [41]:
arr2d[2][1]

40

In [40]:
arr2d[2,1]

40

In [49]:
#2d arry index slicing
arr2d[1:,1:]

array([[25, 30],
       [40, 45]])

In [50]:
arr=np.arange(1,11)

In [51]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [57]:
#syntax for conditional selection (used a lot in panadas)
bool_arr= arr>5

In [58]:
bool_arr

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [59]:
arr[bool_arr]

array([ 6,  7,  8,  9, 10])

In [60]:
arr[arr<3]

array([1, 2])

In [67]:
arr2d=np.random.randint(10,50,50).reshape(5,10)
arr2d

array([[41, 28, 31, 36, 27, 22, 40, 49, 26, 25],
       [11, 24, 18, 19, 26, 35, 28, 35, 45, 41],
       [37, 44, 22, 41, 15, 39, 48, 48, 11, 19],
       [25, 43, 48, 40, 25, 34, 17, 41, 19, 41],
       [35, 10, 25, 47, 21, 27, 49, 40, 24, 35]])

In [69]:
arr2d[3:,3:7]


array([[40, 25, 34, 17],
       [47, 21, 27, 49]])

<h1>NumPy Operations</h1>

-Array with Array </br>
-Array with Scalars </br>
-Universal Array Functions

In [70]:
import numpy as np

In [71]:
arr=np.arange(0,11)

In [72]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [73]:
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [74]:
arr -arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [75]:
arr*arr

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

In [76]:
arr + 100

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [79]:
arr - 100

array([-100,  -99,  -98,  -97,  -96,  -95,  -94,  -93,  -92,  -91,  -90])

In [80]:
#panadas won't give an error and will give a warning when divide by zero
arr/arr

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [81]:
1/arr

  """Entry point for launching an IPython kernel.


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
       0.1       ])

In [82]:
arr**2

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

In [84]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [85]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [87]:
np.max(arr)

10

In [88]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

In [89]:
np.log(arr)

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,
       2.30258509])

<h1>NumPy Exercises Overview</h1>

In [2]:
#import NumPy as np
import numpy as np

In [97]:
#Array with 10 zeros
arr=np.zeros(10)
arr

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [98]:
#create array of 10 ones
arr=np.ones(10)
arr

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [3]:
#create arry of 10 5s
np.ones(10)*5

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [116]:
#create an array of integers from 10 to 50
np.arange(10,51)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
       44, 45, 46, 47, 48, 49, 50])

In [117]:
#create an array of all the even integers from 10 to 50
np.arange(10,51,2)

array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
       44, 46, 48, 50])

In [118]:
#create a 3x3 matrix with values ranging from 0 to 8
np.arange(0,9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [120]:
#create a 3x3 identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [4]:
#use NumPy to generate a random number between 0 to 1
np.random.rand(1)

array([0.55118992])

In [5]:
#NumPy to generate an array of 25 random numbers sampled from a standard normal distribution
np.random.randn(25)

array([-0.52783134, -0.18441783, -2.19579074, -1.0218689 ,  0.19632175,
       -0.39654968,  0.1208737 , -0.84052678, -0.56290382, -0.45892244,
       -0.41510926,  0.8339802 , -0.18188715, -1.93273531, -0.23257067,
       -0.80196369, -0.32336732,  0.01115485, -0.02788314,  0.0767972 ,
       -0.47827529,  0.21811458,  0.67043398,  0.03845162, -0.07220669])

In [136]:
np.arange(.01,1.01,0.01).reshape(10,10)

array([[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ],
       [0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ],
       [0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ],
       [0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ],
       [0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ],
       [0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ],
       [0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 ],
       [0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ],
       [0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 ],
       [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.  ]])

In [147]:
#creat an array of 20 linearly spaced points between 0 and 1
np.sort(np.random.rand(20))

array([0.13752287, 0.18893016, 0.21693672, 0.28598399, 0.4084887 ,
       0.46961165, 0.51584861, 0.60563966, 0.70342678, 0.72816491,
       0.74285217, 0.77434491, 0.78636449, 0.78668157, 0.8052101 ,
       0.88063409, 0.89818396, 0.90302757, 0.92770787, 0.9561283 ])

In [7]:
#matrix selection
mat=np.arange(1,26).reshape(5,5)
mat

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [8]:
mat[2:,1:]

array([[12, 13, 14, 15],
       [17, 18, 19, 20],
       [22, 23, 24, 25]])

In [9]:
mat[3,4]

20

In [21]:
#keep in mind the indexing notation

print(mat[0:3,1])

print(mat[0:3,1:2])

[ 2  7 12]
[[ 2]
 [ 7]
 [12]]


In [16]:
mat[4:]

array([[21, 22, 23, 24, 25]])

In [22]:
mat[3:]

array([[16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [23]:
#sum of all values in mat
np.sum(mat)

325

In [24]:
#standard deviation of mat
np.std(mat)

7.211102550927978

In [25]:
#sum of all columns in mat
mat.sum(axis=0)

array([55, 60, 65, 70, 75])