# Week 3 - NumPy and Arrays¶

### NumPy is a Python package for numerical computing. The core NumPy data type is the homogeneous array.

### Built-in functions in Python don't have to be 'imported' before use

In [1]:
a=5
type(a) # Python's type() function is built-in and does not need to be imported before it is called

int

In [2]:
b="NumPy and Arrays"
type(b)

str

In [3]:
name = input("What is your name? ")    # the input() function is built-in
print("Your name is:",name)

What is your name? Peter
Your name is: Peter


In [4]:
n_month = input("What number of month is it?")  # Python's input function always returns a string
print("Next month is number:",int(n_month)+1)

What number of month is it?10
Next month is number: 11


In [5]:
input("enter something:")    # a code cell that runs Python's input() function must "complete" before another code cell is run

enter something:2


'2'

In [6]:
2+2

4

### Some functions are part of the Python Standard Library, but not built-in

### These functions come with every Python installation, but must be imported before they are used

In [7]:
from math import sin
sin(30)  # sine of 30 radians

-0.9880316240928618

In [8]:
from math import cos, tan, pi
print(cos(pi/2))
print(tan(pi/4))

6.123233995736766e-17
0.9999999999999999


In [9]:
from statistics import mean
a = [13,8,7,9,12,10]
mean(a)

9.833333333333334

### Functions in external packages like NumPy and Pandas need to be installed before they can be used. If using just Python, would need to install the NumPy and Pandas package first with pip install command.¶

### In the Anaconda distribution of Python , these external packages come pre-installed, so we can directly import them

In [13]:
import numpy as np   # typical import

In [14]:
np.__version__     # the .__version__ attribute is common to many Python packages

'1.21.2'

In [15]:
a = np.array([1,2,3])
print(a)

[1 2 3]


In [17]:
import numpy     # functional, but not the typical way NumPy is imported

In [18]:
numpy.__version__

'1.21.2'

In [19]:
b = numpy.array([1,2,3])
print(b)
type(b)

[1 2 3]


numpy.ndarray

In [22]:
from math import sin as mysine    # the alias mysine() can now be called. mysine will call the sin() function.

In [23]:
mysine(30)

-0.9880316240928618

In [24]:
import math

In [25]:
math.sin(30)

-0.9880316240928618

In [26]:
dir(math)   # Python's dir() function will return all the available functions, methods and attributes on an object

['__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'acos',
 'acosh',
 'asin',
 'asinh',
 'atan',
 'atan2',
 'atanh',
 'ceil',
 'comb',
 'copysign',
 'cos',
 'cosh',
 'degrees',
 'dist',
 'e',
 'erf',
 'erfc',
 'exp',
 'expm1',
 'fabs',
 'factorial',
 'floor',
 'fmod',
 'frexp',
 'fsum',
 'gamma',
 'gcd',
 'hypot',
 'inf',
 'isclose',
 'isfinite',
 'isinf',
 'isnan',
 'isqrt',
 'lcm',
 'ldexp',
 'lgamma',
 'log',
 'log10',
 'log1p',
 'log2',
 'modf',
 'nan',
 'nextafter',
 'perm',
 'pi',
 'pow',
 'prod',
 'radians',
 'remainder',
 'sin',
 'sinh',
 'sqrt',
 'tan',
 'tanh',
 'tau',
 'trunc',
 'ulp']

In [27]:
math.sinh(30)

5343237290762.231

In [28]:
from math import * # not recommended. This imports all the fuctions of the math module all at once. Then just the function name needs to be used

In [29]:
sqrt(4)

2.0

## Data Types in NumPy Arrays¶

The array converts all elements into the least precise data type, if there is mixed data types in array

 * np.bool (most precise)
 * np.int64
 * np.float64
 * np.string
 * np.object (least precise)

In [32]:
import numpy as np

In [33]:
c = np.array([1,3.04,"name"])
print(c)

['1' '3.04' 'name']


In [34]:
d = np.array([2.03,2,5,6.7])
print(d)

[2.03 2.   5.   6.7 ]


In [35]:
f = np.array([True,False,3])
print(f)

[1 0 3]


### Array Attributes - these specify data type of array element, size of array, dimension of array etc.and they can be changed too by using assignment operator

In [36]:
import numpy as np

In [37]:
a = np.array([1,2,3])

In [42]:
a.ndim   # number of dimensions. A "rectangular" or 2D array will return a .ndim of 2

1

In [39]:
a.dtype  # data type

dtype('int64')

In [40]:
a.size   # total number of elements

3

In [43]:
len(a)   # Python's built-in len() function returns the number of elements in a container data type like a list

3

In [44]:
b = np.array([[1,2,3],[3,4,5]])

In [45]:
b

array([[1, 2, 3],
       [3, 4, 5]])

In [46]:
b.ndim

2

In [47]:
b.dtype

dtype('int64')

In [48]:
b.size

6

In [49]:
c = np.array([[1,2,3],[3,4,5]], np.float64)
c

array([[1., 2., 3.],
       [3., 4., 5.]])

In [50]:
c.dtype

dtype('float64')

In [52]:
g = [1,2,4000]   # example showing how to remove an element from a list
print(g)
g.pop()
print(g)

[1, 2, 4000]
[1, 2]


In [53]:
h = [1,4000,2] # example showing how to remove an element from a list
print(h)
h.pop(1)
print(h)

[1, 4000, 2]
[1, 2]


In [54]:
b = np.array([[1,2,3],[3,4,5]])
print(b)
b.shape

[[1 2 3]
 [3 4 5]]


(2, 3)

In [55]:
b.shape = (3,2)
print(b)
b.shape

[[1 2]
 [3 3]
 [4 5]]


(3, 2)

## Array generation functions in NumPy

In [56]:
a = np.array([1,2,3,4,5,6,7])

In [57]:
b = np.arange(0,10,1)   #start(inclusive),stop (exclusive),step
print(b)

[0 1 2 3 4 5 6 7 8 9]


In [58]:
c = np.arange(10)
print(c)

[0 1 2 3 4 5 6 7 8 9]


In [59]:
d = np.arange(2,9)    #start, stop, step=1 (the default)
print(d)

[2 3 4 5 6 7 8]


In [60]:
f = np.arange(0,22,2) #start, stop, step=2
print(f)

[ 0  2  4  6  8 10 12 14 16 18 20]


In [61]:
g = np.linspace(0,360,6)   #start, stop, number of elements
print(g)

[  0.  72. 144. 216. 288. 360.]


In [62]:
h = np.logspace(0,4,3)   #exponent start, exponent stop, number of log spaced elements
print(h)

[1.e+00 1.e+02 1.e+04]


In [63]:
k = np.arange(0,100,1)
print(k,k.shape)
k.shape = (10,10)      # can "re-shape" an array by assigning a shape as a tuple (rows,cols)
print(k,k.shape)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99] (100,)
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]] (10, 10)


In [64]:
a = np.zeros(5)
print(a)

[0. 0. 0. 0. 0.]


In [65]:
a = np.zeros([3,4])
print(a)
a.shape = (6,2)
print(a)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [66]:
# example showing the different between a list and tuple
v = [3,4]      # lists are defined with [ ]
print(type(v))
v[1] = 5
print(v)
x = (3,4)      # tuples are defined with (  )
print(type(x))
x[1] = 5       # can't modify the elements in a tuple.

<class 'list'>
[3, 5]
<class 'tuple'>


TypeError: 'tuple' object does not support item assignment

In [67]:
c = np.ones(5)
print(c)

[1. 1. 1. 1. 1.]


In [68]:
d = np.ones([4,6])
print(d)

[[1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1.]]


In [69]:
# example of broadcasting
a = np.arange(0,100,1)
print(a)
b = -2*a**2 + 3*a - 10
print(b)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
[   -10     -9    -12    -19    -30    -45    -64    -87   -114   -145
   -180   -219   -262   -309   -360   -415   -474   -537   -604   -675
   -750   -829   -912   -999  -1090  -1185  -1284  -1387  -1494  -1605
  -1720  -1839  -1962  -2089  -2220  -2355  -2494  -2637  -2784  -2935
  -3090  -3249  -3412  -3579  -3750  -3925  -4104  -4287  -4474  -4665
  -4860  -5059  -5262  -5469  -5680  -5895  -6114  -6337  -6564  -6795
  -7030  -7269  -7512  -7759  -8010  -8265  -8524  -8787  -9054  -9325
  -9600  -9879 -10162 -10449 -10740 -11035 -11334 -11637 -11944 -12255
 -12570 -12889 -13212 -13539 -13870 -14205 -14544 -14887 -15234 -15585
 -15940 -16299 -16662 -17029 -17400 -17775 -18154 -1853

## Array Indexing and Slicing

 * NumPy arrays can be indexed and sliced just like Python lists and strings
 * Counting starts from 0 and stops at n-1 with default step 1
 * Array dimension maybe one, two or multi dimensional

### One dimensional array and array attributes

In [70]:
a = np.arange(100,1000,100)
print(a)
print(a.size)

[100 200 300 400 500 600 700 800 900]
9


In [71]:
a[0]   # index out the element at index 0 (first element)

100

In [72]:
a[2]

300

In [73]:
a[-1]

900

In [74]:
a[0:9:2]   # [start:stop:step]

array([100, 300, 500, 700, 900])

In [75]:
a[1:9:2]

array([200, 400, 600, 800])

In [76]:
a[::2]    # [default start: default stop: step=2]

array([100, 300, 500, 700, 900])

### Two dimensional arrays and array attributes

 * Indexed and sliced like one dimensional arrays
 * array elements are referenced as [row,column] by location

In [77]:
a = np.arange(1,13,1)
print(a)
a.shape = (3,4)
print(a)

[ 1  2  3  4  5  6  7  8  9 10 11 12]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [78]:
a[0,0]  # row 0, col 0

1

In [79]:
a[1,3]  # row 1, col 3

8

In [80]:
a[2,1]

10

In [81]:
a[0:3:1,0]  # rows: start 0, end 3, count by 1, col 0

array([1, 5, 9])

In [82]:
a[:,0]     # rows: all (default start, default stop, default step), col 0

array([1, 5, 9])

In [83]:
a[0:2,2]

array([3, 7])

In [84]:
a[1:,2:]   # rows: start 1, end at default, col: start 2, end at default

array([[ 7,  8],
       [11, 12]])

### np.where() returns the location of a value in an array

In [85]:
c = np.arange(-3,4,1)
print(c)

[-3 -2 -1  0  1  2  3]


In [86]:
# where is the number 2 in the array, what is the index where number 2 is located?
loc = np.where(c==2)
print(loc)

(array([5]),)


In [87]:
c[loc]

array([2])

In [88]:
neg_loc = np.where(c<0)
print(neg_loc)

(array([0, 1, 2]),)


In [89]:
pos_loc = np.where(c>0)
print("The indexes or locations that contain positive numbers")
print(pos_loc)
print("The positive numbers are:")
print(c[pos_loc])

The indexes or locations that contain positive numbers
(array([4, 5, 6]),)
The positive numbers are:
[1 2 3]


In [90]:
s = np.array(["a","b","c","d"])
loc = np.where(s=="c")
print(loc)

(array([2]),)


In [91]:
a = np.arange(0,9,1)
a.shape=(3,3)
print(a)
# locations of all the numbers greater than 4
loc = np.where(a>4)
print(loc)
print("the numbers greater than 4 are: ")
print(a[loc])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
(array([1, 2, 2, 2]), array([2, 0, 1, 2]))
the numbers greater than 4 are: 
[5 6 7 8]


In [92]:
b = np.array([1,3,50000,4])
# set the outlier 50000 to 5
loc = np.where(b>10)
print(loc)
b[loc] = 5
print(b)

(array([2]),)
[1 3 5 4]


In [93]:
# remove the outlier
b = np.array([1,3,50000,4])
loc = np.where(b<10)
c = b[loc]
print(c)

[1 3 4]


## Boolean Masks

### A boolean mask is an array that contains all True or False values only.

 * For each True value in the boolean mask, a value is returned from an indexed array.
 * For each False value in the boolean mask, no value is returned from an indexed array.


In [94]:
a = np.array([-3,-4,10,15])
a

array([-3, -4, 10, 15])

In [95]:
mask = np.array([True,True,False,True])
mask

array([ True,  True, False,  True])

In [96]:
a[mask]  # index out all the values where the mask is "True"

array([-3, -4, 15])

In [97]:
mask2 = np.array([False,True,False,True])
a[mask2]

array([-4, 15])

In [98]:
# can use an expression to create a boolean mask
mask3 = a < 0
mask3

array([ True,  True, False, False])

In [99]:
a[mask3]

array([-3, -4])

In [100]:
a

array([-3, -4, 10, 15])

In [101]:
mask4 = a > 0
mask4

array([False, False,  True,  True])

In [102]:
a[mask4]

array([10, 15])

In [104]:
a[a>0]      # instead of creating a mask variable, you can pass the mask creation directly in as an index

array([10, 15])

In [105]:
a[a<0]

array([-3, -4])

In [106]:
c = np.array([-1,-2,3000000,8])
c[c<10]

array([-1, -2,  8])

In [107]:
c>=-2   # c

array([ True,  True,  True,  True])

In [108]:
c[c!=8]    # not equivalent to 8

array([     -1,      -2, 3000000])

In [110]:
c[c[c==-1]]   # deeply nested masking

array([8])

In [111]:
c==-1

array([ True, False, False, False])

In [112]:
c[c==-1]

array([-1])

In [113]:
c[c[c==100]]

array([], dtype=int64)

### Broadcasting functions and mathematical operations across arrays

In [114]:
t = 2
x = 2*t**2 + 4*4 +10
print(x)

34


In [115]:
t = np.arange(0,61)
print(t)
x = 2*t**2 + 4*4 +10
print(x)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60]
[  26   28   34   44   58   76   98  124  154  188  226  268  314  364
  418  476  538  604  674  748  826  908  994 1084 1178 1276 1378 1484
 1594 1708 1826 1948 2074 2204 2338 2476 2618 2764 2914 3068 3226 3388
 3554 3724 3898 4076 4258 4444 4634 4828 5026 5228 5434 5644 5858 6076
 6298 6524 6754 6988 7226]


In [116]:
a = np.array([0,1,2])
b = np.array([0,10,20])
a*b

array([ 0, 10, 40])

In [117]:
a+b

array([ 0, 11, 22])

## Getting help with NumPy and Python Math functions / methods

### Get help using dir in Jupyter on functions, methods,variables or objects

In [118]:
import statistics

In [119]:
dir(statistics)

['Counter',
 'Decimal',
 'Fraction',
 'NormalDist',
 'StatisticsError',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_coerce',
 '_convert',
 '_exact_ratio',
 '_fail_neg',
 '_find_lteq',
 '_find_rteq',
 '_isfinite',
 '_normal_dist_inv_cdf',
 '_ss',
 '_sum',
 'bisect_left',
 'bisect_right',
 'erf',
 'exp',
 'fabs',
 'fmean',
 'fsum',
 'geometric_mean',
 'groupby',
 'harmonic_mean',
 'hypot',
 'itemgetter',
 'log',
 'math',
 'mean',
 'median',
 'median_grouped',
 'median_high',
 'median_low',
 'mode',
 'multimode',
 'numbers',
 'pstdev',
 'pvariance',
 'quantiles',
 'random',
 'sqrt',
 'stdev',
 'tau',
 'variance']

### Get help using tab key after objectname.[tab] in Jupter cel

In [121]:
statistics.median([1,3,4])

3

### Get help using help() function in Jupyter cell

In [122]:
help(statistics.median)

Help on function median in module statistics:

median(data)
    Return the median (middle value) of numeric data.
    
    When the number of data points is odd, return the middle data point.
    When the number of data points is even, the median is interpolated by
    taking the average of the two middle values:
    
    >>> median([1, 3, 5])
    3
    >>> median([1, 3, 5, 7])
    4.0



## Refer to Appendix E in the text: NumPy Math functions. Describes various NumPy functions.

In [123]:
# For a list of all NumPy functions and methods
import numpy as np
for func in dir(np):
    print(func)

ALLOW_THREADS
AxisError
BUFSIZE
Bytes0
CLIP
DataSource
Datetime64
ERR_CALL
ERR_DEFAULT
ERR_IGNORE
ERR_LOG
ERR_PRINT
ERR_RAISE
ERR_WARN
FLOATING_POINT_SUPPORT
FPE_DIVIDEBYZERO
FPE_INVALID
FPE_OVERFLOW
FPE_UNDERFLOW
False_
Inf
Infinity
MAXDIMS
MAY_SHARE_BOUNDS
MAY_SHARE_EXACT
MachAr
NAN
NINF
NZERO
NaN
PINF
PZERO
RAISE
SHIFT_DIVIDEBYZERO
SHIFT_INVALID
SHIFT_OVERFLOW
SHIFT_UNDERFLOW
ScalarType
Str0
Tester
TooHardError
True_
UFUNC_BUFSIZE_DEFAULT
UFUNC_PYVALS_NAME
Uint64
WRAP
_NoValue
_UFUNC_API
__NUMPY_SETUP__
__all__
__builtins__
__cached__
__config__
__deprecated_attrs__
__dir__
__doc__
__expired_functions__
__file__
__getattr__
__git_version__
__loader__
__mkl_version__
__name__
__package__
__path__
__spec__
__version__
_add_newdoc_ufunc
_distributor_init
_financial_names
_globals
_mat
_pytesttester
_version
abs
absolute
add
add_docstring
add_newdoc
add_newdoc_ufunc
alen
all
allclose
alltrue
amax
amin
angle
any
append
apply_along_axis
apply_over_axes
arange
arccos
arccosh
arcsin
arcsinh

## Example of pulling out an entire column or entire row from an array

In [129]:
# From worksheet
B = np.array([ [ 99 , 4, 10, 18 ],
[ 8, 2, 7, 20 ],
[ -44, 56, 8, 5 ] ])
B

array([[ 99,   4,  10,  18],
       [  8,   2,   7,  20],
       [-44,  56,   8,   5]])

In [130]:
# pull out the 3rd column (column at index 2)
B[0:3:1,2]

array([10,  7,  8])

In [131]:
B[:,2]

array([10,  7,  8])

In [132]:
# pull out the 2nd column (column at index 1)
B[:,1]

array([ 4,  2, 56])

In [133]:
# pull out all elements from the 2nd row (row index 1)
B[1,:]

array([ 8,  2,  7, 20])

In [135]:
# B[row,col]