# NumPy
### Reference: 
- online free docs:
  - https://docs.scipy.org/doc/numpy/
  - https://docs.scipy.org/doc/numpy/user/basics.types.html
  - https://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs
- [book old edition free](https://www.safaribooksonline.com/library/view/python-data-science/9781491912126/)
- [book new edition pay](https://smile.amazon.com/Python-Data-Science-Handbook-Essential/dp/1491912057/)

### Guidline
- NumPy Arrays are more compact (and therefore more memory efficient) than Python Lists.
- Unlike Python Lists, NumPy is constrained to arrays that all contain the same type.

In [1]:
import numpy as np

## `Array`

In [2]:
arr1D = np.array([1,2,3])
print(arr1D)

[1 2 3]


In [3]:
arr2D = np.array([[1,2,3],[4,5,6]])
print(arr2D)

[[1 2 3]
 [4 5 6]]


## `ARange`

In [4]:
# start , stop , step
arr1D = np.arange(10)
arr1D

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
arr1D = np.arange(1,10)
print(arr1D)

[1 2 3 4 5 6 7 8 9]


In [6]:
arr1D = np.arange(1,10,2)
print(arr1D)

[1 3 5 7 9]


## `Linspace`

In [7]:
# start, stop, #number in the interval
arr1D = np.linspace(0,10,5)
print(arr1D)

[ 0.   2.5  5.   7.5 10. ]


In [8]:
arr1D = np.linspace(1,10,5)
print(arr1D)

[ 1.    3.25  5.5   7.75 10.  ]


In [9]:
arr1D = np.linspace(1,10,10)
print(arr1D)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


## `Empty` Method
Barely Used

In [10]:
arr1D = np.empty(10) # 1D (no gurantee to initalize with Zero)
print(arr1D)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [11]:
arr2D = np.empty((2,3)) # 2 by 3 2 deminsional array
print(arr2D)

[[1.28822975e-231 1.28822975e-231 3.95252517e-323]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000]]


## `Zeros` Method

In [12]:
arr1D = np.zeros(10) # 1D (guaranteed all zeros)
print(arr1D)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [13]:
arr2D = np.zeros((3,3)) # 2 dimensional
print(arr2D)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


## `Ones` Method

In [14]:
arr1D = np.ones(10) # all ones
print(arr1D)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [15]:
arr2D = np.ones((5,2)) # all ones
print(arr2D)

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


## `Full` Method

In [16]:
arr1D = np.full(10,3.14)
print(arr1D)

[3.14 3.14 3.14 3.14 3.14 3.14 3.14 3.14 3.14 3.14]


In [17]:
arr2D = np.full((3,5),3.14)
print(arr2D)

[[3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]]


## Identity(eye) Method

In [18]:
arr2D = np.eye(5) # 2D
print(arr2D) # float dtype

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [19]:
arr2D = np.eye(5, dtype=int) # 2D
print(arr2D) # float dtype

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]


## `Random` (In Different Distribution)

In [20]:
# use seed to get consistent random set of numbers
np.random.seed(0)

### Uniform Distribution over [0,1)

In [21]:
arr1D = np.random.rand(3)
print(arr1D)

[0.5488135  0.71518937 0.60276338]


In [22]:
arr2D = np.random.rand(3,5) # note this is not a tuple, but 2 different arguments!
print(arr2D)

[[0.54488318 0.4236548  0.64589411 0.43758721 0.891773  ]
 [0.96366276 0.38344152 0.79172504 0.52889492 0.56804456]
 [0.92559664 0.07103606 0.0871293  0.0202184  0.83261985]]


### (Standard) Normal Distribution (with mean of 0 and standard deviation of 1)
***can be change with loc and scale paramenter***

In [23]:
arr1D = np.random.normal(size=3) # specify size parameter
print(arr1D)

[0.44386323 0.33367433 1.49407907]


In [24]:
arr2D = np.random.normal(size=(3,5)) # specify size parameter
print(arr2D)

[[-0.20515826  0.3130677  -0.85409574 -2.55298982  0.6536186 ]
 [ 0.8644362  -0.74216502  2.26975462 -1.45436567  0.04575852]
 [-0.18718385  1.53277921  1.46935877  0.15494743  0.37816252]]


In [25]:
arr1D = np.random.normal(0, 1, size=3) 
print(arr1D)
print(np.mean(arr1D), np.std(arr1D)) # mean and std different from argument due to too few sample size

[-0.88778575 -1.98079647 -0.34791215]
-1.0721647883933973 0.679251805410509


In [26]:
arr1D = np.random.normal(0, 1, size=30000) 
print(np.mean(arr1D), np.std(arr1D))

-0.004750309766252265 0.9924490756194764


### Discrete Uniform Distribution in [low, high) (if specified) or [o,low) (if only low specified)

In [27]:
# low, high, size
arr1D = np.random.randint(0,10,3)
print(arr1D)

[1 6 0]


In [28]:
# low, high, size
arr2D = np.random.randint(0,10,(3,5))
print(arr2D)

[[9 9 9 3 6]
 [7 1 8 0 6]
 [7 0 9 3 2]]


## Dimensionality

In [29]:
data = np.random.randint(1,100,(3,5))
print(data)

[[29 18  9 71 52]
 [48 82 78 40 20]
 [49 87 59 22 67]]


In [30]:
data.ndim

2

In [31]:
data.shape

(3, 5)

In [32]:
data.size

15

In [33]:
data.dtype

dtype('int64')

## `Reshape`

In [34]:
arr1D = np.arange(10)
print(arr1D)

[0 1 2 3 4 5 6 7 8 9]


In [35]:
arr2D = arr1D.reshape(2,5)
print(arr1D) # original object not affected
print(arr2D)

[0 1 2 3 4 5 6 7 8 9]
[[0 1 2 3 4]
 [5 6 7 8 9]]


In [36]:
arr2D = np.arange(15).reshape(5,3)
print(arr2D)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]


In [37]:
arr1D = np.arange(1,40,2)
arr1D

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39])

In [38]:
arr1D.shape

(20,)

## Array Slicing

In [39]:
# array slicing
arr1D[2:8] # work just like list

array([ 5,  7,  9, 11, 13, 15])

In [40]:
arr1D[10:]

array([21, 23, 25, 27, 29, 31, 33, 35, 37, 39])

In [41]:
arr2D = arr1D.reshape(4,5)
print(arr2D)

[[ 1  3  5  7  9]
 [11 13 15 17 19]
 [21 23 25 27 29]
 [31 33 35 37 39]]


In [42]:
# 2D array slicing 1 row
arr2D[1]

array([11, 13, 15, 17, 19])

In [43]:
# 2D array slicing 1 col
arr2D[:,1]

array([ 3, 13, 23, 33])

In [44]:
# 2D array slicing 1 element
arr2D[1][3]

17

In [45]:
# 2D array slicing 1 element
arr2D[1,3]

17

In [46]:
# 2D array slicing into 2D
arr2D[1:4, 1:4]

array([[13, 15, 17],
       [23, 25, 27],
       [33, 35, 37]])

## Mutability

In [47]:
arr1D = np.arange(10)
print(arr1D)

[0 1 2 3 4 5 6 7 8 9]


In [48]:
arr1D[2] = 3.1415
print(arr1D) # arange's dtype is INT not float

[0 1 3 3 4 5 6 7 8 9]


## `AsType`

In [49]:
arr1D = arr1D.astype(float)
print(arr1D)

[0. 1. 3. 3. 4. 5. 6. 7. 8. 9.]


In [50]:
arr1D[2] = 3.1415
print(arr1D)

[0.     1.     3.1415 3.     4.     5.     6.     7.     8.     9.    ]


## Object Binding & Copy

In [51]:
data = np.arange(10)
data[2] = 400
print(data)

[  0   1 400   3   4   5   6   7   8   9]


In [52]:
# binding
data = np.arange(10)
databind = data
print(data)
print(databind)

# update data
data[0] = 555
print(data)
print(databind)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[555   1   2   3   4   5   6   7   8   9]
[555   1   2   3   4   5   6   7   8   9]


In [53]:
# copy
data = np.arange(10)
databind = data.copy()
print(data)
print(databind)

# update data
data[0] = 555
print(data)
print(databind) # databind does not change

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[555   1   2   3   4   5   6   7   8   9]
[0 1 2 3 4 5 6 7 8 9]


# Part-2

In [54]:
np.random.seed(10)
data = np.random.randint(0,100,10)
data

array([ 9, 15, 64, 28, 89, 93, 29,  8, 73,  0])

## `sum`

In [55]:
print(data.sum())

408


In [56]:
print(np.sum(data))

408


# `min`

In [57]:
print(np.min(data))
print(data.min())
print('position of the minimum argument:',np.argmin(data))

0
0
position of the minimum argument: 9


## `max`

In [58]:
print(np.max(data))
print(data.max())
print('position of the maximum argument:',np.argmax(data))

93
93
position of the maximum argument: 5


## `mean`, `median`, `std`, `var`, `percentile`

In [59]:
print(np.mean(data)) # mean
print(np.median(data)) # median
print(np.std(data)) # standard deviation
print(np.var(data)) # variance
print(np.percentile(data,10)) # 10th percentile

40.8
28.5
33.68026128164685
1134.36
7.2


## `any`
any element that is not zero

In [60]:
np.any(data)

True

## `all`
all elements are not zero

In [61]:
np.all(data)

False

### these function also work for 2 dimensional array

In [62]:
# change data to 2D array
data = data.reshape((2,5))
print(data)

[[ 9 15 64 28 89]
 [93 29  8 73  0]]


In [63]:
print(np.mean(data)) # mean
print(np.median(data)) # median
print(np.std(data)) # standard deviation
print(np.var(data)) # variance
print(np.percentile(data,10)) # 10th percentile

# the result are exactly the same

40.8
28.5
33.68026128164685
1134.36
7.2


## axis

In [64]:
print(np.sum(data, axis=0))
print(np.sum(data, axis=1))

[102  44  72 101  89]
[205 203]


# `ufuncs` (Universal Function)

In [65]:
np.random.seed(1)
data = np.arange(10)
print(data)

[0 1 2 3 4 5 6 7 8 9]


## `np.exp()` 
$e^{x}$

In [66]:
print(np.exp(data)) # e ^ x

[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]


## `np.exp2()`


In [67]:
print(np.exp2(data)) # 2 ^ x

[  1.   2.   4.   8.  16.  32.  64. 128. 256. 512.]


## `np.power()`

In [68]:
print(np.power(3, data)) # arg1 ^ x

[    1     3     9    27    81   243   729  2187  6561 19683]


## `np.log()`

In [69]:
print(np.log(data))

[      -inf 0.         0.69314718 1.09861229 1.38629436 1.60943791
 1.79175947 1.94591015 2.07944154 2.19722458]


  print(np.log(data))


## `np.log2()`

In [70]:
print(np.log2(data))

[      -inf 0.         1.         1.5849625  2.         2.32192809
 2.5849625  2.80735492 3.         3.169925  ]


  print(np.log2(data))


## `np.log10()`

In [71]:
print(np.log10(data))

[      -inf 0.         0.30103    0.47712125 0.60205999 0.69897
 0.77815125 0.84509804 0.90308999 0.95424251]


  print(np.log10(data))


## `np.sin()`, `np.cos()`, `np.tan()`

In [72]:
print(np.sin(data))

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866   0.98935825  0.41211849]


In [73]:
print(np.cos(data))

[ 1.          0.54030231 -0.41614684 -0.9899925  -0.65364362  0.28366219
  0.96017029  0.75390225 -0.14550003 -0.91113026]


In [74]:
print(np.tan(data))

[ 0.          1.55740772 -2.18503986 -0.14254654  1.15782128 -3.38051501
 -0.29100619  0.87144798 -6.79971146 -0.45231566]


# Conditional Selection

In [75]:
data = np.arange(1,20,2)
print(data)

[ 1  3  5  7  9 11 13 15 17 19]


In [76]:
data > 5 # return an array

array([False, False, False,  True,  True,  True,  True,  True,  True,
        True])

In [77]:
data[ data > 5] # return elements the fit the condition (is true)

array([ 7,  9, 11, 13, 15, 17, 19])

In [78]:
data[ data <= 5] # return elements the fit the condition (is true)

array([1, 3, 5])

In [79]:
data = np.arange(9).reshape((3,3))
print(data)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [80]:
data[data>=3] # return in 1D

array([3, 4, 5, 6, 7, 8])

In [81]:
a = np.array([1,2,3,40,50])
b = np.array([10,20,3,4,5])

a > b

array([False, False, False,  True,  True])

In [82]:
a[a>b]

array([40, 50])

In [83]:
data = np.arange(9)
print(data)

[0 1 2 3 4 5 6 7 8]


In [84]:
data[data>3]

array([4, 5, 6, 7, 8])

### logic operator

In [85]:
data[ (data > 3) | (data < 5)]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [86]:
data[ (data > 3) & (data < 5)] 

array([4])

# Arithmetic Operation


## array & scalar
array with scalar arithemetic operations +, -, *, /, ** (uses **boradcast**)

In [87]:
data = np.arange(1,10,1).reshape((3,3))
print(data)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [88]:
-data

array([[-1, -2, -3],
       [-4, -5, -6],
       [-7, -8, -9]])

In [89]:
1/data

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667],
       [0.14285714, 0.125     , 0.11111111]])

In [90]:
data + 5

array([[ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [91]:
data - 5

array([[-4, -3, -2],
       [-1,  0,  1],
       [ 2,  3,  4]])

In [92]:
print(data * 2)
print()
print(np.multiply(data, 2))

[[ 2  4  6]
 [ 8 10 12]
 [14 16 18]]

[[ 2  4  6]
 [ 8 10 12]
 [14 16 18]]


In [93]:
data / 2

array([[0.5, 1. , 1.5],
       [2. , 2.5, 3. ],
       [3.5, 4. , 4.5]])

In [94]:
data ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [95]:
data % 2

array([[1, 0, 1],
       [0, 1, 0],
       [1, 0, 1]])

In [96]:
data // 2

array([[0, 1, 1],
       [2, 2, 3],
       [3, 4, 4]])

In [97]:
np.absolute(-data)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

## array & array
array with array arithemetic operations +, -, *, /, ** (operates on corresponding elements)

In [98]:
X = np.arange(0,5)
Y = np.arange(5,10)
print(X, Y)

[0 1 2 3 4] [5 6 7 8 9]


In [99]:
print(X + Y)
print(X - Y)
print(X * Y)
print(X / Y)
print(X ** Y)
print(X % Y)
print(X // Y)

[ 5  7  9 11 13]
[-5 -5 -5 -5 -5]
[ 0  6 14 24 36]
[0.         0.16666667 0.28571429 0.375      0.44444444]
[     0      1    128   6561 262144]
[0 1 2 3 4]
[0 0 0 0 0]


## Normalize

In [100]:
data = np.random.randint(0,100,10)
data

array([37, 12, 72,  9, 75,  5, 79, 64, 16,  1])

In [101]:
dm = np.mean(data)
dm

37.0

In [102]:
ds = np.std(data)
ds

30.548322376196047

In [103]:
norm = ((data - dm) / ds)
print(data)
print(norm)

[37 12 72  9 75  5 79 64 16  1]
[ 0.         -0.81837555  1.14572576 -0.91658061  1.24393083 -1.0475207
  1.37487092  0.88384559 -0.68743546 -1.17846079]


In [104]:
print(norm.mean())
print(norm.std())

2.2204460492503132e-17
0.9999999999999999


---

# Exercise

#### Exercise Set 1
Create a numpy array as below:
> `np.random.seed(0)` <br>
> `arr = np.array(np.random.normal(size=(3,5)))` <br>
> `arr`

In [105]:
np.random.seed(0)
arr = np.array(np.random.normal(size=(3,5)))
arr

array([[ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799],
       [-0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ],
       [ 0.14404357,  1.45427351,  0.76103773,  0.12167502,  0.44386323]])

(1) print column 1 <br>
(2) print row 0 <br>
(3) print element at row 0 and column 1 <br>
(4) print columns 1 and 3 <br>
(5) print rows 0 and 2 <br>
(6) print rows 0 and 2 & columns 1 and 3 <br>

In [106]:
print('(1) print column 1')
print(arr[:,1])
print()
print('(2) print row 0')
print(arr[0])
print()
print('(3) print element at row 0 and column 1')
print(arr[0,1])
print()
print('(4) print columns 1 and 3')
print(arr[:,[1,3]])
print()
print('(5) print rows 0 and 2')
print(arr[[0,2]])
print()
print('(6) print rows 0 and 2 & columns 1 and 3')
print(arr[np.ix_([0,2],[1,3])])

(1) print column 1
[0.40015721 0.95008842 1.45427351]

(2) print row 0
[1.76405235 0.40015721 0.97873798 2.2408932  1.86755799]

(3) print element at row 0 and column 1
0.4001572083672233

(4) print columns 1 and 3
[[ 0.40015721  2.2408932 ]
 [ 0.95008842 -0.10321885]
 [ 1.45427351  0.12167502]]

(5) print rows 0 and 2
[[1.76405235 0.40015721 0.97873798 2.2408932  1.86755799]
 [0.14404357 1.45427351 0.76103773 0.12167502 0.44386323]]

(6) print rows 0 and 2 & columns 1 and 3
[[0.40015721 2.2408932 ]
 [1.45427351 0.12167502]]


#### Exercise Set 2

Define a 3 x 5 array as follows:
> `np.random.seed(10)` <br>
> `nr, nc = 3, 5` <br>
> `arr = np.random.randint(0, 100, 15).reshape(nr, nc)` <br>
> `arr` <br>

In [107]:
np.random.seed(10)
nr, nc = 3, 5
arr = np.random.randint(0, 100, 15).reshape(nr, nc)
arr

array([[ 9, 15, 64, 28, 89],
       [93, 29,  8, 73,  0],
       [40, 36, 16, 11, 54]])

**Min-Max scale** each row:
- for each row, find the row minimum (rmin) and the row maximum (rmax)
- then for each element (e) in each row, transform as follows: (e – rmin)/(rmax – rmin)
- this will scale each row to the range [0, 1]

In [108]:
# row min
rmin = np.min(arr, axis=1).reshape((nr,1))
# row max
rmax = np.max(arr, axis=1).reshape((nr,1))

# array with array arithmetic operation
mmsarr = (arr - rmin)/(rmax - rmin)
print(rmin)
print(rmax)
print(rmax-rmin)
print(np.round(mmsarr,2))

[[ 9]
 [ 0]
 [11]]
[[89]
 [93]
 [54]]
[[80]
 [93]
 [43]]
[[0.   0.08 0.69 0.24 1.  ]
 [1.   0.31 0.09 0.78 0.  ]
 [0.67 0.58 0.12 0.   1.  ]]


**Min-Max scale** each *column*

In [109]:
# col min
cmin = np.min(arr, axis=0).reshape((1,nc))
# col max
cmax = np.max(arr, axis=0).reshape((1,nc))

# array with array arithmetic operation
mmsarrC = (arr - cmin)/(cmax - cmin)
print(cmin)
print(cmax)
print(cmax-cmin)
print(np.round(mmsarrC,2))

[[ 9 15  8 11  0]]
[[93 36 64 73 89]]
[[84 21 56 62 89]]
[[0.   0.   1.   0.27 1.  ]
 [1.   0.67 0.   1.   0.  ]
 [0.37 1.   0.14 0.   0.61]]


In [110]:
arr = np.array([[10,10,10],[20,20,20]])

rvec = np.array([5,5,5])

arr - rvec

array([[ 5,  5,  5],
       [15, 15, 15]])

---

## Quiz: what will these cells return?

In [111]:
arr = np.array([[100,100,100],[200,200,200]])

cvec = np.array([10,10]).reshape(2,1)

arr - cvec

array([[ 90,  90,  90],
       [190, 190, 190]])

In [112]:
arr1= np.array([[10,10,10],[20,20,20]])

arr2= np.array([[5,5,5],[10,10,10]])

arr1-arr2

array([[ 5,  5,  5],
       [10, 10, 10]])