In [218]:
import numpy as np
from jupyterthemes import jtplot
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:70% !important; }</style>"))
jtplot.style()

# NumPy CookBook

![](../source/numpy-6-step-ml-framework-tools-numpy-highlight.png)<a name='ch_index'></a>

* [Question](https://github.com/mrdbourke/zero-to-mastery-ml/blob/master/section-2-data-science-and-ml-tools/numpy-exercises.ipynb)  
* [Answer](https://github.com/mrdbourke/zero-to-mastery-ml/blob/master/section-2-data-science-and-ml-tools/numpy-exercises-solutions.ipynb)

#### Why NumPy?
---
* It's fast (C language)
* Backbone of other Python scientific packages
* Vectorization via broadcasting (avoiding loops)

#### What are we going to cover?
---
* Most userful functions
* NumPy datatypes & attributes (ndarray)
* Creating arrays
* Viewing arrays & matrices
* Sorting arrays
* Use cases

[CH1. NumPy datatypes & attributes](#numpy_datatypes_attributes)<br/>
[CH2. Creating arrays](#creating_arrays)<br/>
[CH3. Viewing arrays & matrices](#viesing_arrays_matrices)<br/>
[CH4. Manipulating & comparing arrays](#manipulating_comparing_arrays)<br/>
[CH5. Sorting arrays](#sorting_arrays)<br/>
[CH6. Use cases](#use_cases)<br/>
[CH7. Diemensions and Axis](#diemension_and_axis)<br/>

## NumPy datatypes & attributes<a name='numpy_datatypes_attributes'></a>
---

* Main datatype is ndarray In NumPy.
* Operation some array, will work on another.

In [219]:
# 1-dimensonal array, also referred to as a vector
a1 = np.array([1, 2, 3])

# 2-dimensional array, also referred to as matrix
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])

# 3-dimensional array, also referred to as a matrix
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
                [[10, 11, 12],
                 [13, 14, 15],
                 [16, 17, 18]]])

* Array - A list of numbers, can be multi-dimensional.
* Scalar - A single number (e.g. 7).
* Vector - A list of numbers with 1-dimesion (e.g. np.array([1, 2, 3])).
* Matrix - A (usually) multi-deminsional list of numbers (e.g. np.array([[1, 2, 3], [4, 5, 6]])).<br/>
[back_index](#ch_index)

## Creating arrays <a name="creating_arrays"></a>
---

* [np.array()](#np.array)
* [np.ones()](#np.ones)
* [np.zeros()](#np.zeros)
* [np.arange()](#np.arange)
* [np.random.rand(5, 3)](#np.random.rand)
* [np.random.randint(10, size=5)](#np.random.randint)
* [np.random.seed() - pseudo random numbers](#np.random.seed)
* [Searching the documentation example (finding np.unique() and using it)](#np.unique)

##### np.array()<a name="np.array"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)<br/>
[back](#creating_arrays)

In [220]:
# Create a simple array
simple_array = np.array([1, 2, 3])
simple_array

array([1, 2, 3])

##### np.ones()<a name="np.ones"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ones.html)<br/>
[back](#creating_arrays)

In [221]:
# Create an array of ones
ones = np.ones((10, 2))
ones, ones.dtype

(array([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]]),
 dtype('float64'))

In [222]:
ones = ones.astype(int)
ones, ones.dtype

(array([[1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1]]),
 dtype('int32'))

##### np.zeros()<a name="np.zeros"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html)<br/>
[back](#creating_arrays)

In [223]:
# Create an array of zeros
zeros = np.zeros((5, 3, 3))
zeros

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

##### np.arange()<a name="np.arange"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.arange.html)<br/>
[back](#creating_arrays)

In [224]:
# Create an array within a range of values
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

##### np.random.rand()<a name="np.random.rand"></a>
[see also](https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.randint.html)<br/>
[back](#creating_arrays)

In [225]:
# Random 5x3 array of floats (between 0 & 1), similar to above
np.random.random((5, 3))

array([[0.01458075, 0.09336303, 0.82655425],
       [0.83349274, 0.89241102, 0.9580135 ],
       [0.56133712, 0.0905487 , 0.99598891],
       [0.47740172, 0.68491561, 0.84337526],
       [0.61612483, 0.56318751, 0.36816219]])

##### np.random.randint()<a name="np.random.randint"></a>
[see also](https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.randint.html)<br/>
[back](#creating_arrays)

In [226]:
# Random array
random_array = np.random.randint(10, size=(5, 3))
random_array

array([[3, 1, 7],
       [3, 1, 9],
       [5, 7, 0],
       [9, 6, 0],
       [5, 2, 8]])

##### np.random.seed()<a name="np.random.seed"></a>
[see also](https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.random.seed.html)<br/>
[back](#creating_arrays)

In [227]:
# Set random seed to 0
np.random.seed(0)

# Make 'random' numbers
np.random.randint(10, size=(5, 3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

##### np.unique()<a name="np.unique"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.unique.html)<br/>
[back](#creating_arrays)

In [228]:
np.random.seed(1)
u1 = np.random.randint(5, size=(5, 3))
u1

array([[3, 4, 0],
       [1, 3, 0],
       [0, 1, 4],
       [4, 1, 2],
       [4, 2, 4]])

In [229]:
# Searh unique value from array
np.unique(u1)

array([0, 1, 2, 3, 4])

## Viewing arrays & matrices (indexing)<a name='viesing_arrays_matrices'></a>
___
* array and matrices are both ndarray, they can be viewed in similar ways.
* NumPy arrays get printed from outside to inside. This means the number at the end of the shape comes first, and the number at the start of the shape comes last.<br/>
[back_index](#ch_index)

#### np.shape

In [245]:
a4 = np.random.randint(10, size=[2, 3, 4, 5])
# shape (4d=2, 3d=3, 2d(column)=4, 1d(row)=5)
# axis  (4d=0. 3d=1, 2d(column)=2, 1d(row)=3)
a4.shape

(2, 3, 4, 5)

In [231]:
# Get only the first 4 numbers of each single vector
a4[:, :, :, :4]

array([[[[2, 4, 7, 7],
         [1, 7, 0, 6],
         [9, 7, 6, 9],
         [0, 1, 8, 8]],

        [[9, 8, 7, 3],
         [5, 1, 9, 3],
         [8, 1, 4, 0],
         [9, 2, 0, 4]],

        [[2, 7, 7, 9],
         [6, 9, 3, 7],
         [4, 5, 9, 3],
         [8, 0, 2, 7]]],


       [[[9, 7, 3, 0],
         [7, 7, 1, 1],
         [0, 8, 6, 4],
         [6, 2, 5, 7]],

        [[4, 4, 7, 7],
         [9, 0, 2, 0],
         [1, 7, 9, 8],
         [0, 1, 9, 8]],

        [[3, 1, 2, 7],
         [6, 0, 9, 2],
         [6, 2, 7, 7],
         [6, 5, 1, 4]]]])

#### np.axis <a name='np.axis'></a>
[see np.argmax(ndarray, axis=)](https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html)<br/>
[back](#numpy_datatypes_attributes)<br/>

In [232]:
np.argmax(random_array, axis=0)

array([3, 2, 1], dtype=int64)

## Manipulating & comparing arrays <a name='manipulating_comparing_arrays'><a>
___
* Arithmetic
  * +, -, *, /, //, **, %
  * np.exp()
  * np.log()
  * Dot product - np.dot()
  * Broadcasting
* Aggregation
  * np.sum() - faster than .sum(), make demo, np is really fast
  * np.mean()
  * np.std()
  * np.var()
  * np.min()
  * np.max()
  * np.argmin() - find index of minimum value
  * np.argmax() - find index of maximum value
  * These work on all ndarray's
      * a4.min(axis=0) -- you can use axis as well
* Reshaping
  * np.reshape()
* Transposing
  * a3.T
* Comparison operators
  * \>
  * \<  
  * <=  
  * \>=  
  * x != 3  
  * X == 3  
  * np.sum(x > 3)
[back_index](#ch_index)

## Sorting arrays <a name='sorting_arrays'><a>
---
* [np.sort()](#np.sort)
* [np.argsort()](#np.argsort)
* [np.argmax()](#np.argmax)
* [np.argmin()](#np.argmin)  
[back_index](#ch_index)

##### np.sort()<a name="np.sort"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html)<br/>
[back](#sorting_arrays)

In [233]:
random_array

array([[3, 1, 7],
       [3, 1, 9],
       [5, 7, 0],
       [9, 6, 0],
       [5, 2, 8]])

In [234]:
np.sort(random_array)

array([[1, 3, 7],
       [1, 3, 9],
       [0, 5, 7],
       [0, 6, 9],
       [2, 5, 8]])

##### np.argsort()<a name="np.argsort"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html)<br/>
[back](#sorting_arrays)

In [235]:
np.random.seed(5)
a1 = np.random.randint(100, size=[2, 3, 5])
a1

array([[[99, 78, 61, 16, 73],
        [ 8, 62, 27, 30, 80],
        [ 7, 76, 15, 53, 80]],

       [[27, 44, 77, 75, 65],
        [47, 30, 84, 86, 18],
        [ 9, 41, 62,  1, 82]]])

In [236]:
# 回傳array 值由小到大排序後的index
np.argsort(a1)

array([[[3, 2, 4, 1, 0],
        [0, 2, 3, 1, 4],
        [0, 2, 3, 1, 4]],

       [[0, 1, 4, 3, 2],
        [4, 1, 0, 2, 3],
        [3, 0, 1, 2, 4]]], dtype=int64)

##### np.argmax()<a name="np.argmax"></a>
[see also](https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html)<br/>
[back](#sorting_arrays)

## Use cases (examples of turning things into numbers)<a name='use_cases'></a>
---
* Turning an image of a panda into numbers.<br/>
![](../source/panda.png)
[back_index](#ch_index)

In [242]:
from matplotlib.image import imread

panda = imread('../source/panda.png')
print(type(panda))

<class 'numpy.ndarray'>


In [243]:
panda.shape

(2330, 3500, 3)

In [244]:
panda

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       ...,

       [[0.13333334, 0.07450981, 0.05490196],
        [0.12156863, 0.0627451 , 0.04313726],
        [0.10980392, 0

## Diemensions and Axis <a name='diemension_and_axis'></a>
![](../source/numpy-anatomy-of-a-numpy-array.png)
[back_index](#ch_index)

In [267]:
np.random.seed(100)
d1 = np.random.randint(10, size=[5])
d2 = np.random.randint(10, size=[4, 5])
d3 = np.random.randint(10, size=[3, 4, 5])
d4 = np.random.randint(10, size=[2, 3, 4, 5])

In [247]:
# 1-d
d1

array([8, 8, 3, 7, 7])

In [253]:
print(f'd1.size={d1.shape},di.dimension={d1.ndim}')

d1.size=(5,),di.dim=1


In [260]:
d1.sum(axis=0)

33

In [262]:
# 2-d
d2

array([[0, 4, 2, 5, 2],
       [2, 2, 1, 0, 8],
       [4, 0, 9, 6, 2],
       [4, 1, 5, 3, 4]])

In [254]:
print(f'd1.size={d2.shape},di.dimension={d2.ndim}')

d1.size=(4, 5),di.dimension=2


In [266]:
# sim(axis=1)
  
d2.sum(axis=1), d2.sum(axis=0)

(array([13, 13, 21, 17]), array([10,  7, 17, 14, 16]))

In [249]:
# 3-d
d3

array([[[4, 3, 7, 1, 1],
        [7, 7, 0, 2, 9],
        [9, 3, 2, 5, 8],
        [1, 0, 7, 6, 2]],

       [[0, 8, 2, 5, 1],
        [8, 1, 5, 4, 2],
        [8, 3, 5, 0, 9],
        [3, 6, 3, 4, 7]],

       [[6, 3, 9, 0, 4],
        [4, 5, 7, 6, 6],
        [2, 4, 2, 7, 1],
        [6, 6, 0, 7, 2]]])

In [None]:
print(f'd1.size={d3.shape},di.dimension={d3.ndim}')

In [268]:
# 4-d
d4

array([[[[3, 5, 4, 2, 4],
         [3, 7, 9, 0, 0],
         [5, 9, 6, 6, 5],
         [6, 4, 7, 3, 9]],

        [[2, 3, 8, 7, 1],
         [5, 9, 3, 0, 6],
         [2, 3, 4, 8, 9],
         [8, 5, 2, 7, 5]],

        [[9, 0, 9, 8, 6],
         [2, 0, 5, 3, 2],
         [3, 6, 4, 1, 3],
         [1, 4, 8, 8, 2]]],


       [[[2, 7, 2, 1, 2],
         [7, 1, 0, 5, 3],
         [5, 2, 6, 1, 1],
         [5, 9, 2, 5, 6]],

        [[4, 6, 7, 9, 7],
         [3, 0, 2, 5, 1],
         [1, 0, 3, 6, 7],
         [3, 6, 4, 8, 6]],

        [[5, 0, 0, 5, 1],
         [3, 2, 3, 1, 0],
         [6, 5, 2, 0, 0],
         [9, 1, 5, 2, 3]]]])

In [286]:
d4.sum(axis=0)

array([[[ 5, 12,  6,  3,  6],
        [10,  8,  9,  5,  3],
        [10, 11, 12,  7,  6],
        [11, 13,  9,  8, 15]],

       [[ 6,  9, 15, 16,  8],
        [ 8,  9,  5,  5,  7],
        [ 3,  3,  7, 14, 16],
        [11, 11,  6, 15, 11]],

       [[14,  0,  9, 13,  7],
        [ 5,  2,  8,  4,  2],
        [ 9, 11,  6,  1,  3],
        [10,  5, 13, 10,  5]]])

In [288]:
np.exp(81)

1.5060973145850306e+35