# 1. Understanding Data Types in Python

In [2]:
import numpy
numpy.__version__

'1.19.2'

In [5]:
import numpy as np
# np.<TAB>
# np?

### a. A Python Integer Is More Than Just an Integer

##### A single integer in Python contains four pieces
* ob_refcnt
* ob_type
* ob_size
* ob_digit

### b. A Python List Is More Than Just a List

In [8]:
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [10]:
type(L[0])

int

In [11]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [13]:
type(L2[0])

str

In [15]:
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]

[bool, str, float, int]

### c. Fixed-Type Arrays in Python

In [17]:
import array
L = list(range(10))
A = array.array('i', L) # i: integer type
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### d. Creating Arrays from Python Lists

In [19]:
np.array([1,4,2,5,3])

array([1, 4, 2, 5, 3])

In [21]:
np.array([3.14, 4., 2., 3.])

array([3.14, 4.  , 2.  , 3.  ])

In [23]:
np.array([1,2,3,4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [25]:
np.array([range(i, i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### e. Creating Arrays from Scratch

In [26]:
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [28]:
np.ones((3,5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [30]:
np.full((3,5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [32]:
np.arange(0, 20, 2) # start, stop, step

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [34]:
np.linspace(0,1,5) # start, stop, # of points,  both ends included

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [36]:
np.random.random((3,3)) # uniformly distributed between 0 and 1

array([[0.50485015, 0.56576594, 0.48369839],
       [0.53676262, 0.97621908, 0.24823152],
       [0.26738521, 0.06476087, 0.13928228]])

In [38]:
np.random.normal(0,1, (3,3)) # noral distribution with mean:0 , standard deviation:1

array([[-2.28963631, -0.86883525,  1.45948645],
       [ 2.16476654, -0.91044444, -0.29371813],
       [ 1.35072664, -1.04342835,  0.6046366 ]])

In [40]:
np.random.randint(0, 10, (3,3)) # random integers

array([[3, 6, 8],
       [5, 8, 5],
       [5, 7, 3]])

In [42]:
np.eye(3) # 3x3 identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [43]:
np.empty(3) # uninitialized array

array([1., 1., 1.])

### f. NumPy Standard Data Types

In [44]:
np.zeros(10, dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [45]:
np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

# 2. The Basics of NumPy Array
* Attributes of arrays
* Indexing of arrays
* Slicing of arrays
* Reshaping of arrays
* Joining and spliting of arrays

### a. NumPy Array Attributes

In [46]:
import numpy as np
np.random.seed(0)

x1 = np.random.randint(10, size=6) # one dim
x2 = np.random.randint(10, size=(3, 4)) # two dim
x3 = np.random.randint(10, size=(3, 4, 5))

In [49]:
print('x3 ndim: ', x3.ndim)
print('x3 shape: ', x3.shape)
print('x3 size: ', x3.size)
print('x3 dtype: ', x3.dtype)
print('x3 itemsize: ', x3.itemsize, 'bytes')
print('x3 nbytes: ', x3.nbytes, 'bytes')

x3 ndim:  3
x3 shape:  (3, 4, 5)
x3 size:  60
x3 dtype:  int64
x3 itemsize:  8 bytes
x3 nbytes:  480 bytes


### b. Array Indexing: Accessing Single Elements

In [51]:
x1

array([5, 0, 3, 3, 7, 9])

In [52]:
print(x1[0])
print(x1[4])
print(x1[-1])
print(x1[-2])

5
7
9
7


In [53]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [54]:
print(x2[0,0])
print(x2[2,0])
print(x2[2,-1])

3
1
7


In [55]:
x2[0,0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [56]:
x1[0] = 3.141592 # truncated
x1

array([3, 0, 3, 3, 7, 9])

### c. Array Slicing: Accessing Subarrays
* x[start: stop: step]

In [59]:
x = np.arange(10)
print(x)
print(x[:5])
print(x[4:76])
print(x[::2])
print(x[1::2])
print(x[::-1])
print(x[5::-2])

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4]
[4 5 6 7 8 9]
[0 2 4 6 8]
[1 3 5 7 9]
[9 8 7 6 5 4 3 2 1 0]
[5 3 1]


In [61]:
print(x2)
print(x2[:2, :3])
print(x2[:3, ::2])
print(x2[::-1, ::-1])

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]
[[12  5  2]
 [ 7  6  8]]
[[12  2]
 [ 7  8]
 [ 1  7]]
[[ 7  7  6  1]
 [ 8  8  6  7]
 [ 4  2  5 12]]


In [62]:
print(x2[:, 0])
print(x2[0, :])
print(x2[0])

[12  7  1]
[12  5  2  4]
[12  5  2  4]


In [63]:
x2_sub = x2[:2, :2]
print(x2_sub)
x2_sub[0,0] = 99
print(x2_sub)
print(x2)

[[12  5]
 [ 7  6]]
[[99  5]
 [ 7  6]]
[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [64]:
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy[0,0] = 42
print(x2_sub_copy)
print(x2)

[[42  5]
 [ 7  6]]
[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


### d. Reshaping of Arrays

In [65]:
grid = np.arange(1, 10).reshape((3,3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [71]:
x = np.array([1,2,3])
print(x)
print(x.shape)
y=x.reshape((1,3))
print(y.shape)
print(y)

[1 2 3]
(3,)
(1, 3)
[[1 2 3]]


In [72]:
print(x)
print(x[np.newaxis, :])

[1 2 3]
[[1 2 3]]


In [74]:
print(x)
print(x.shape)
z = x.reshape((3,1))
print(z)
print(z.shape)

[1 2 3]
(3,)
[[1]
 [2]
 [3]]
(3, 1)


In [75]:
z2 = x[:, np.newaxis]
print(z2)
print(z2.shape)

[[1]
 [2]
 [3]]
(3, 1)


### e. Array Concatenation and Splitting

In [76]:
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate([x,y])

array([1, 2, 3, 3, 2, 1])

In [78]:
z = [99, 99, 99]
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [80]:
grid = np.array([[1,2,3],[4,5,6]])
np.concatenate([grid, grid]) # along the first axis --> change the size of index axis = 0

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [82]:
np.concatenate([grid, grid], axis=1) # along the second axis --> change the size of columns

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [84]:
grid = np.array([[9,8,7],[6,5,4]])
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [87]:
y = np.array([99, 97])
y = y[:, np.newaxis]
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 97]])

In [93]:
x = [1,2,3,99,99,3,2,1]
x1,x2,x3 = np.split(x, [3,5]) # from index 3, 5
print(x1,x2, x3)

[1 2 3] [99 99] [3 2 1]


In [91]:
grid = np.arange(16).reshape((4,4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [94]:
upper, lower = np.vsplit(grid, [2]) # the second row
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [95]:
left, right = np.hsplit(grid, [2]) # the second column
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


# 3. Computaion on NumPy Arrays: Universal Functions

### a. The Slowness of Loops

In [97]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [99]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

1.59 s ± 20.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### b. Introducing UFuncs

In [102]:
print(compute_reciprocals(values)) # loop
print(1.0/values) # vectorized operation

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [106]:
%timeit (1.0/big_array) # vectorized operations via ufuncs

948 µs ± 37.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [107]:
np.arange(5)/np.arange(1,6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [108]:
x = np.arange(9).reshape((3,3))
2**x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

### c. Exploring NumPy's UFuncs
* unary ufncs
* binary ufuncs

### d. Advanced UFunc Features

In [109]:
x = np.arange(5)
y= np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [111]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


In [115]:
x = np.arange(1,6)
#np.add(x)
np.add.reduce(x)

15

In [116]:
np.multiply.reduce(x)

120

In [117]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [118]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

In [119]:
x = np.arange(1,6)
np.multiply.outer(x,x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

### e. UFuncs: Learning More

# 4. Aggregations: Min, Max, and Everything in Between

### a. Summing the Values in an Array

### b. Minimum and Maximum

### c. Example: What is the Average Height of US Presidents?

# 5. Computation on Arrays: Broadcasting

### a. Introduction Broadcasting

### b. Rules of Broadcasting

### c. Broadcasting in Practice

# 6. Comparisons, Masks, and Boolean Logic

### a. Example: Counting Rainy Days

### b. Comparison Operators as UFuncs

### c. Working with Boolean Arrays

### d. Boolean Array as Masks

# 7. Fancy Indexing

### a. Exploring Fancy Indexing

### b. Combined Indexing

### c. Example: Selecting Random Points

### d. Modifying Values with Fancy Indexing

### e. Example: Binning Data

# 8. Sorting Arrays

### a. Fast Sorting in NumPy: np.sort and np.argsort 

### b.  Partial Sorts: Partitioning

### c. Example: k-Nearest Neighbors

# 9. Structured Data: NumPy's Structured Arrays

### a. Creating Structured Arrays

### b. More Advanced Compound Types

### c. RecordArrays: Structured Arrays with a Twist

### d. On to Pandas