# Week 3 Notes: NumPy and Arrays

 * External Python Libraries and NumPy
 * Importing NumPy into a Jupyter Notebook
 * Arrays and Arrays vs. Python Lists
 * Array Creation
 * Array Operations
 * Array Slicing
 * Array Masking

## External Python Libraries and NumPy

 * Some Python functions are "built-in" and do not need to be imported to use
 
 ```input()``` for example and ```print()```

In [1]:
n = input("Enter a number")
print(n)

Enter a number2
2


 * Other Python functions come with Python when Python is installed, but they need to be imported before they are used. These functions are part of the Python Standard Library.
 
```math.sin()``` for example and ```statistics.mean()```

In [2]:
math.sin(30)  # the math module must be imported before it is used

NameError: name 'math' is not defined

In [3]:
import math
math.sin(30)   # After the math module is imported, the math standard library functions can be used

-0.9880316240928618

 * Outside the Standard Library are a lot of external modules on PyPI or conda-forge (the conda package index) for you to use. Some of these external modules are part of the Anaconda distribution of Python. External modules need to be installed and imported before they are used.
 
 NumPy is an example of an external module that is part of the Anaconda Distribution.

In [4]:
# I have installed the Anaconda Distribution of Python, so NumPy has been installed already
np.array([1,2])

NameError: name 'np' is not defined

In [5]:
# NumPy must first be imported before NumPy functions can be used
import numpy as np
np.__version__

'1.18.5'

## NumPy Arrays compared to Python Lists

In [6]:
l = [1,2.2,'word'] # multiple different data types can be stored in a Python list

In [7]:
type(l)

list

In [8]:
type(l[0])

int

In [9]:
type(l[1])

float

In [10]:
type(l[2])

str

In [11]:
l*3  # it's not easy to run computations on Python lists

[1, 2.2, 'word', 1, 2.2, 'word', 1, 2.2, 'word']

In [13]:
# NumPy array contain all of the same data type
import numpy as np

a = np.array([1,2,3])

In [14]:
type(a[0])

numpy.int32

In [15]:
type(a[1])

numpy.int32

In [16]:
a.dtype

dtype('int32')

In [17]:
b = np.array([2.2,3.3])  # an array of floats

In [18]:
type(b[0])

numpy.float64

In [19]:
type(b[1])

numpy.float64

In [21]:
b.dtype

dtype('float64')

In [22]:
c = np.array([1,2.2,'s']) # try and create a NumPy array with 3 different data types

In [23]:
type(c[0])

numpy.str_

In [24]:
type(c[1])

numpy.str_

In [25]:
type(c[2])

numpy.str_

In [27]:
d = np.array([1,2.2], dtype=np.float64)

In [28]:
type(d[0])

numpy.float64

In [29]:
l = [1,2.2] # Python List
a = np.array([1,2.2]) # NumPy Array

In [30]:
l*2

[1, 2.2, 1, 2.2]

In [31]:
a*2

array([2. , 4.4])

## Array Creation

In [33]:
import numpy as np

In [34]:
a = np.array([1,2,2]) # create an array from a Python List
type(a)

numpy.ndarray

In [41]:
b = np.array(4) # create an array from an individual object
type(b)

numpy.ndarray

In [42]:
c = np.arange(0,3,1)    # create an array of numbers based on start, stop, step
c

array([0, 1, 2])

In [43]:
d = np.arange(3) # default start, stop, default step
d

array([0, 1, 2])

In [45]:
f = np.arange(0,3)  # start, stop, default step
f

array([0, 1, 2])

In [46]:
g = np.arange(0,1.1,0.1)
g

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [47]:
g.size

11

In [48]:
g.shape

(11,)

In [49]:
# regularly spaced array of numbers
# (start,stop, number of elements)
h = np.linspace(0,np.pi,10) # 10 angles between 0 and pi, equally spaced
h

array([0.        , 0.34906585, 0.6981317 , 1.04719755, 1.3962634 ,
       1.74532925, 2.0943951 , 2.44346095, 2.7925268 , 3.14159265])

In [51]:
k = np.logspace(1,4,4)   # logrithymically spaced numbers. Note the supplied numbers are the exponents, not the numbers
k

array([   10.,   100.,  1000., 10000.])

In [53]:
p = np.logspace(1,10000,2)   # 10^10,000 is larger than the largest floating point number

In [54]:
q = np.zeros(4)
q

array([0., 0., 0., 0.])

In [57]:
r = np.zeros([2,3])    # row,col
r

array([[0., 0., 0.],
       [0., 0., 0.]])

In [58]:
r.size

6

In [59]:
r.shape

(2, 3)

In [61]:
t = np.ones([4,6])
t

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [63]:
# a meshgrid is a 2D-array built out of two 1D-arrays
a = np.arange(1,4)
a

array([1, 2, 3])

In [65]:
b = np.arange(10,30,10)
b

array([10, 20])

In [66]:
g = np.meshgrid(a,b)
g

[array([[1, 2, 3],
        [1, 2, 3]]),
 array([[10, 10, 10],
        [20, 20, 20]])]

## Array operations

In [67]:
a = np.arange(1,5)
a

array([1, 2, 3, 4])

In [68]:
a*2

array([2, 4, 6, 8])

In [69]:
a/2

array([0.5, 1. , 1.5, 2. ])

In [70]:
c = np.array([1,2])
d = np.array([10,20])
c + d

array([11, 22])

In [71]:
d - c

array([ 9, 18])

In [72]:
c*d

array([10, 40])

In [73]:
c/d

array([0.1, 0.1])

In [74]:
np.dot(c,d)   # the vector dot product

50

In [75]:
np.cross(c,d) # the vector cross product

array(0)

In [76]:
import math
math.sin([1,2,3])

TypeError: must be real number, not list

In [77]:
import numpy as np
np.sin(np.array([1,2,3]))

array([0.84147098, 0.90929743, 0.14112001])

In [78]:
np.mean(a)

2.5

In [80]:
d**2

array([100, 400], dtype=int32)

In [81]:
a = np.array([1,2,3])

In [82]:
import math
math.sin(a)

TypeError: only size-1 arrays can be converted to Python scalars

In [83]:
import numpy as np
np.sin(a)

array([0.84147098, 0.90929743, 0.14112001])

## Array Slicing

In [84]:
a = np.array([[4,7,9],[12,13,18]])
a

array([[ 4,  7,  9],
       [12, 13, 18]])

In [85]:
a.shape

(2, 3)

In [86]:
a[0]

array([4, 7, 9])

In [88]:
a[1]

array([12, 13, 18])

In [89]:
# pull out the number 18   row1, col2
a[1,2]    # row,col

18

In [90]:
# pull out number 12   row1, col0
a[1,0]

12

## Array Slicing

In [91]:
a = np.array([[4,7,9],[12,13,18]])
a

array([[ 4,  7,  9],
       [12, 13, 18]])

In [92]:
# pull out the row 4,7,9    , row 0, col 0 to 3
a[0,0:3]

array([4, 7, 9])

In [93]:
# pull out the column 9,18    row 0 to 2, col 2
a[0:2,2]

array([ 9, 18])

In [94]:
# pull out the block 7,9,13,18    row 0 to 2, col 1 to 3
a[0:2,1:3]

array([[ 7,  9],
       [13, 18]])

In [95]:
# pull out 4,12 9, 18 the first col and the last col   row 0 to 2, col 0 to 3 count by 2
a[0:2,0:3:2]

array([[ 4,  9],
       [12, 18]])

In [96]:
a[:,0:3:2]   # all rows, col 0 to 3 count by 2

array([[ 4,  9],
       [12, 18]])

In [97]:
a[:,::2]    # all rows, col start default end default count by 2

array([[ 4,  9],
       [12, 18]])

In [98]:
# pull out last row
a[-1,:]     # last row, all columns

array([12, 13, 18])

In [99]:
# pull out the 2nd to last column
a[:,-2]     # all rows, 2nd to last col

array([ 7, 13])

## Array Masking

Masking is the process of pulling out individual values or groups of values from an array based on a logical mask

In [100]:
a = np.array([1,2,3,4])
mask = np.array([False,True,False,False])

In [101]:
# index out values from array a based on mask "mask"
a[mask]

array([2])

In [102]:
# pull out 2 and 3 using a boolean mask
a = np.array([1,2,3,4])
mask2 = np.array([False,True,True,False])
a[mask2]

array([2, 3])

In [103]:
b = np.array([[1,2,3],[100,200,300]])
b

array([[  1,   2,   3],
       [100, 200, 300]])

In [104]:
# Pull out the values 3 and 100 using a boolean mask
mask3=np.array([[False,False,True],[True,False,False]])
b[mask3]

array([  3, 100])

In [105]:
# create a boolean mask using a boolean operator
b = np.array([[1,2,3],[100,200,300]])
b

array([[  1,   2,   3],
       [100, 200, 300]])

In [106]:
# pull out all the values greater than 150
mask4 = b>150
mask4

array([[False, False, False],
       [False,  True,  True]])

In [107]:
b[mask4]

array([200, 300])

In [109]:
# pull out all the values less than 50
mask5 = b<50
mask5

array([[ True,  True,  True],
       [False, False, False]])

In [110]:
b[mask5]

array([1, 2, 3])

In [111]:
# pull out the value where the array is equal to 100
mask6 = b==100
mask6

array([[False, False, False],
       [ True, False, False]])

In [112]:
b[mask6]

array([100])

In [113]:
# boolean masking inline (in one line of code)
b[b<=2]

array([1, 2])

In [114]:
# np.where function
# boolean masking pulls out values (the elements of the array) but does not give you locations of values

In [115]:
b

array([[  1,   2,   3],
       [100, 200, 300]])

In [116]:
# where is the value 3 in array b?  (it's on row 0, col 2)
np.where(b==3)

(array([0], dtype=int64), array([2], dtype=int64))

In [117]:
loc = np.where(b==3)

In [118]:
loc

(array([0], dtype=int64), array([2], dtype=int64))

In [125]:
b[loc]

array([3])

In [126]:
loc2 = np.where(b>50)
loc2

(array([1, 1, 1], dtype=int64), array([0, 1, 2], dtype=int64))

In [127]:
b[loc2]

array([100, 200, 300])