In [378]:
import pandas as pd
import numpy as np
from jupyterthemes import jtplot
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:80% !important; }</style>"))
jtplot.style()

## DataTypes & Attributes

In [379]:
# NumPy's main datatype is ndarray
# n dimensional array
# 所有的 Data 都會轉換為數字來進行機器學習, 
# 所以需要極為複雜的維度來組織 Data (數值)
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [380]:
type(a1)

numpy.ndarray

In [381]:
a2 = np.array([[1, 2.0, 3.3], [4, 5, 6.5]])
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
               [[10, 11, 12],
                [13, 14, 15],
                [16, 17, 18]]])
a4 = np.array([[[[2, 3],
                 [2, 3],
                 [2, 3]],
                [[2, 3],
                 [2, 3],
                 [2, 3]],
                [[2, 3],
                 [2, 3],
                 [2, 3]]],
               [[[2, 3],
                 [2, 3],
                 [2, 3]],
                [[2, 3],
                 [2, 13],
                 [2, 3]],
                [[2, 3],
                 [2, 3],
                 [2, 3]]]]

              )

In [382]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [383]:
a3.shape

(2, 3, 3)

In [384]:
a4.shape

(2, 3, 3, 2)

In [385]:
a1.ndim, a2.ndim, a3.ndim, a4.ndim

(1, 2, 3, 4)

In [386]:
a1.size, a2.size, a3.size, a4.size

(3, 6, 18, 36)

In [387]:
# Create a DataFrame from a NumPy array
pd.DataFrame(a2)

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## 2. Creating Arrays

In [388]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [389]:
sample_array.dtype

dtype('int32')

In [390]:
ones = np.ones([1, 2])
ones

array([[1., 1.]])

In [391]:
zeros = np.zeros([1, 2])
zeros

array([[0., 0.]])

In [392]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [393]:
random_array = np.random.randint(1, 10, size=(3, 3, 3))
random_array

array([[[8, 9, 2],
        [6, 9, 5],
        [4, 1, 4]],

       [[6, 1, 3],
        [4, 9, 2],
        [4, 4, 4]],

       [[8, 1, 2],
        [1, 5, 8],
        [4, 3, 8]]])

In [394]:
random_array.size

27

In [395]:
random_array.shape

(3, 3, 3)

In [396]:
random_array2 = np.random.random((5, 3))
random_array2

array([[0.21655035, 0.13521817, 0.32414101],
       [0.14967487, 0.22232139, 0.38648898],
       [0.90259848, 0.44994999, 0.61306346],
       [0.90234858, 0.09928035, 0.96980907],
       [0.65314004, 0.17090959, 0.35815217]])

In [397]:
random_array_3 = np.random.rand(5, 3)
random_array_3

array([[0.75068614, 0.60783067, 0.32504723],
       [0.03842543, 0.63427406, 0.95894927],
       [0.65279032, 0.63505887, 0.99529957],
       [0.58185033, 0.41436859, 0.4746975 ],
       [0.6235101 , 0.33800761, 0.67475232]])

In [398]:
# Pseudo-random number
# 隨機的只有種子編碼, 所以種子編碼固定後, 得到的亂數都會相同
np.random.seed(1)
random_array_4 = np.random.randint(10, size=(5, 3))
random_array_4

array([[5, 8, 9],
       [5, 0, 0],
       [1, 7, 6],
       [9, 2, 4],
       [5, 2, 4]])

## 3. Viewing arrays and 

In [399]:
np.unique(random_array_4)

array([0, 1, 2, 4, 5, 6, 7, 8, 9])

In [400]:
random_array

array([[[8, 9, 2],
        [6, 9, 5],
        [4, 1, 4]],

       [[6, 1, 3],
        [4, 9, 2],
        [4, 4, 4]],

       [[8, 1, 2],
        [1, 5, 8],
        [4, 3, 8]]])

In [401]:
random_array[:2, :2, :2]

array([[[8, 9],
        [6, 9]],

       [[6, 1],
        [4, 9]]])

In [402]:
a5 = np.random.randint(10, size=(2, 3, 4, 5))
a5.shape, a5.ndim

((2, 3, 4, 5), 4)

In [403]:
# Get the first 4 numbers of the inner most arrays
a5

array([[[[2, 4, 7, 7, 9],
         [1, 7, 0, 6, 9],
         [9, 7, 6, 9, 1],
         [0, 1, 8, 8, 3]],

        [[9, 8, 7, 3, 6],
         [5, 1, 9, 3, 4],
         [8, 1, 4, 0, 3],
         [9, 2, 0, 4, 9]],

        [[2, 7, 7, 9, 8],
         [6, 9, 3, 7, 7],
         [4, 5, 9, 3, 6],
         [8, 0, 2, 7, 7]]],


       [[[9, 7, 3, 0, 8],
         [7, 7, 1, 1, 3],
         [0, 8, 6, 4, 5],
         [6, 2, 5, 7, 8]],

        [[4, 4, 7, 7, 4],
         [9, 0, 2, 0, 7],
         [1, 7, 9, 8, 4],
         [0, 1, 9, 8, 2]],

        [[3, 1, 2, 7, 2],
         [6, 0, 9, 2, 6],
         [6, 2, 7, 7, 0],
         [6, 5, 1, 4, 6]]]])

In [404]:
a5[:1, :2, :3, :4]

array([[[[2, 4, 7, 7],
         [1, 7, 0, 6],
         [9, 7, 6, 9]],

        [[9, 8, 7, 3],
         [5, 1, 9, 3],
         [8, 1, 4, 0]]]])

## 4. Manipulating & comparing arrays

### Arithmetic

In [405]:
a1

array([1, 2, 3])

In [406]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [407]:
a1 + ones

array([2., 3., 4.])

In [408]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [409]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [410]:
# 同一維度必須相等或其中一方為1
a3[:, :2, :1]

array([[[ 1],
        [ 4]],

       [[10],
        [13]]])

In [411]:
a3.reshape(3, 2, 3)
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [412]:
a1 / ones

array([1., 2., 3.])

In [413]:
# Floor division removes the decimals (rounds down)
a2 / a1

array([[1.        , 1.        , 1.1       ],
       [4.        , 2.5       , 2.16666667]])

In [414]:
a2 // a1

array([[1., 1., 1.],
       [4., 2., 2.]])

In [415]:
a2 ** 2

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [416]:
np.square(a2)

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [417]:
a1 + ones

array([2., 3., 4.])

In [418]:
np.add(a1, ones)

array([2., 3., 4.])

In [419]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [420]:
a2 % 2

array([[1. , 0. , 1.3],
       [0. , 1. , 0.5]])

In [421]:
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

## Aggregation
Aggregation = performing the same operation on a number of things

In [422]:
listy_list = [1, 2, 3]
type(listy_list)

list

In [423]:
sum(listy_list)

6

In [424]:
a1

array([1, 2, 3])

In [425]:
type(a1)

numpy.ndarray

In [426]:
np.sum(a1)

6

User Python's method( `sum()` ) on Python datatypes and use NumPy's methods on NumPy arrays( `np.sum()` )

In [427]:
# Creative a massive NumPy array
massive_array = np.random.random(100000)
massive_array.size
massive_array[:10]

array([0.5270581 , 0.8859421 , 0.35726976, 0.90853515, 0.62336012,
       0.01582124, 0.92943723, 0.69089692, 0.99732285, 0.17234051])

In [428]:
%timeit sum(massive_array) # Python's sum() 
%timeit np.sum(massive_array) # NumPy's sum()

15.4 ms ± 315 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
42.8 µs ± 481 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [429]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [430]:
np.mean(a2)

3.6333333333333333

In [431]:
np.max(a2) ,np.min(a2), np.std(a2)

(6.5, 1.0, 1.8226964152656422)

## Reshaping & transposing

In [432]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [433]:
a2.shape

(2, 3)

In [434]:
a2.reshape(2, 3, 1)

array([[[1. ],
        [2. ],
        [3.3]],

       [[4. ],
        [5. ],
        [6.5]]])

In [435]:
a2.shape

(2, 3)

In [436]:
a2.T.shape

(3, 2)

![](./source/dot_product_element_wise.png)
![](./source/dot_product_element_wise_2.png)

## Element-wise multiplication (Hadamard product)
* follow broadcast rule, size必須完全相等, 或其中一方為1  

 
arrayA | arrayB | math | describe
--- | --- | --- | ---
size(1, 2, 3)|size(1, 2, 3)| O | size 相等
size(1, 2, 3)|size(3, 2, 3)| X | size 不全等
size(1, 2, 9)|size(9, 1, 1)| O | size 不相等但該維度其中為1
size(3, 6, 2)|size(3, 6)| X | 維度不對 (維度數由右而左升冪)
size(3, 6, 2)|size(6, 2)| O | 

In [437]:
np.random.seed(1)
e_array_1 = np.random.randint(1, 10, size=(3, 6, 1))
e_array_2 = np.random.randint(1, 10, size=(6, 1))
e_array_1 * e_array_2

array([[[48],
        [ 9],
        [42],
        [ 8],
        [ 7],
        [ 4]],

       [[64],
        [ 7],
        [21],
        [40],
        [42],
        [ 6]],

       [[40],
        [ 3],
        [35],
        [64],
        [56],
        [ 4]]])

## Dot product
* 
* 兩元素的


In [438]:
np.random.seed(1)
dot_1 = np.random.randint(1, 10, size=(3, 2))
dot_2 = np.random.randint(1, 10, size=(2, 3))
np.dot(dot_1, dot_2)

array([[93, 96, 45],
       [53, 48, 21],
       [18, 19,  9]])

## Dot product exmaple (nut butter sales)

In [439]:
np.random.seed(0)
# Number of jars sold
sales_amounts = np.random.randint(20, size=(5, 3))
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [440]:
weekly_sales = pd.DataFrame(sales_amounts,
              index=['Mon', 'Tues', 'Wed', 'Thurs', 'Fri'],
              columns=['Almond butter', 'Peanut butter', 'Cashew butter'])
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [441]:
# Create prices array
prices = np.array([10, 8, 12])
prices

array([10,  8, 12])

In [442]:
# Create butter_price DataFrame
# 陣列轉換為pd.DataFrame時, 若為一維陣列須明確轉換形狀(reshape)為 1 row n column
butter_prices = pd.DataFrame(prices.reshape(1, 3),
                           index=['Price'],
                           columns=['Almond butter', 'Peanut butter', 'Cashew butter'])
butter_prices

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Price,10,8,12


In [443]:
# Shaoe aren't aligned, let's transpose
total_sales = prices.dot(sales_amounts.T)
total_sales

array([240, 138, 458, 232, 142])

In [445]:
sales_amounts_T = sales_amounts.T
sales_amounts_T

array([[12,  3,  9,  4,  1],
       [15,  3, 19,  6,  6],
       [ 0,  7, 18, 12,  7]])

In [446]:
# Create daily_sales
butter_prices


Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Price,10,8,12


In [455]:
np.random.seed(0)
# Number of jars sold
sales = np.random.randint(20, size=(5, 3))
sales.shape


(5, 3)

In [459]:
# Create prices array
price = np.array([10, 8, 12])
price = price.reshape(1, 3)
price.shape

(1, 3)

In [464]:
sales.dot(price.T)

array([[240],
       [138],
       [458],
       [232],
       [142]])

In [465]:
price.dot(sales.T)

array([[240, 138, 458, 232, 142]])

In [474]:
# Create daliy_sales
butter_prices.shape, weekly_sales.shape

((1, 3), (5, 3))

In [476]:
weekly_sales.T.shape

(3, 5)

In [479]:
daliy_sales = butter_prices.dot(weekly_sales.T)
daliy_sales

Unnamed: 0,Mon,Tues,Wed,Thurs,Fri
Price,240,138,458,232,142


In [482]:
weekly_sales['Total ($)'] = daliy_sales.T
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter,Total ($)
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142


## Comparison Operators

In [466]:
a1

array([1, 2, 3])

In [467]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [468]:
a1 > a2

array([[False, False, False],
       [False, False, False]])

In [483]:
a1 >= a2

array([[ True,  True, False],
       [False, False, False]])

In [486]:
bool_array = a1 >= a2
type(bool_array), bool_array.dtype

(numpy.ndarray, dtype('bool'))

## Sorting arrays

In [505]:
np.random.seed(1)
random_array = np.random.randint(10, size=(3, 5))
random_array

array([[5, 8, 9, 5, 0],
       [0, 1, 7, 6, 9],
       [2, 4, 5, 2, 4]])

In [506]:
np.sort(random_array)

array([[0, 5, 5, 8, 9],
       [0, 1, 6, 7, 9],
       [2, 2, 4, 4, 5]])

In [509]:
# 回傳各維度index, 以'值'由小而大排序
np.argsort(random_array)

array([[4, 0, 3, 1, 2],
       [0, 1, 3, 2, 4],
       [0, 3, 1, 4, 2]], dtype=int64)

In [510]:
# 返回最小、返回最大
np.argmin(a1), np.argmax(a1)

(0, 2)

In [514]:
random_array

array([[5, 8, 9, 5, 0],
       [0, 1, 7, 6, 9],
       [2, 4, 5, 2, 4]])

In [519]:
np.argmax(random_array, axis=0)

array([0, 0, 0, 1, 1], dtype=int64)

In [631]:
np.random.seed(0)
r = np.random.randint(100, size=(7, 6, 5, 4, 3))
r

array([[[[[44, 47, 64],
          [67, 67,  9],
          [83, 21, 36],
          [87, 70, 88]],

         [[88, 12, 58],
          [65, 39, 87],
          [46, 88, 81],
          [37, 25, 77]],

         [[72,  9, 20],
          [80, 69, 79],
          [47, 64, 82],
          [99, 88, 49]],

         [[29, 19, 19],
          [14, 39, 32],
          [65,  9, 57],
          [32, 31, 74]],

         [[23, 35, 75],
          [55, 28, 34],
          [ 0,  0, 36],
          [53,  5, 38]]],


        [[[17, 79,  4],
          [42, 58, 31],
          [ 1, 65, 41],
          [57, 35, 11]],

         [[46, 82, 91],
          [ 0, 14, 99],
          [53, 12, 42],
          [84, 75, 68]],

         [[ 6, 68, 47],
          [ 3, 76, 52],
          [78, 15, 20],
          [99, 58, 23]],

         [[79, 13, 85],
          [48, 49, 69],
          [41, 35, 64],
          [95, 69, 94]],

         [[ 0, 50, 36],
          [34, 48, 93],
          [ 3, 98, 42],
          [77, 21, 73]]],


        [[[ 0, 1

In [632]:
# axis0 = 1d (row)
# axis1 = 2d (cloumn)
# axis2 = 3d ()
# axisn = nd -1
np.argmax(r, axis=4)

array([[[[2, 0, 0, 2],
         [0, 2, 1, 2],
         [0, 0, 2, 0],
         [0, 1, 0, 2],
         [2, 0, 2, 0]],

        [[1, 1, 1, 0],
         [2, 2, 0, 0],
         [1, 1, 0, 0],
         [2, 2, 2, 0],
         [1, 2, 1, 0]],

        [[2, 2, 1, 1],
         [2, 1, 2, 1],
         [1, 2, 0, 1],
         [2, 1, 0, 1],
         [1, 0, 2, 2]],

        [[1, 0, 0, 0],
         [0, 0, 1, 1],
         [1, 2, 0, 2],
         [1, 1, 2, 2],
         [0, 0, 0, 2]],

        [[1, 2, 1, 2],
         [0, 1, 1, 2],
         [0, 1, 2, 2],
         [1, 2, 0, 1],
         [0, 2, 0, 0]],

        [[0, 0, 1, 2],
         [1, 1, 0, 2],
         [2, 1, 2, 2],
         [0, 0, 0, 2],
         [0, 2, 2, 2]]],


       [[[0, 1, 0, 0],
         [0, 0, 2, 0],
         [2, 0, 0, 2],
         [2, 2, 1, 1],
         [0, 0, 2, 1]],

        [[1, 0, 1, 1],
         [0, 2, 0, 0],
         [2, 1, 1, 2],
         [1, 0, 2, 1],
         [2, 2, 0, 0]],

        [[0, 0, 0, 0],
         [0, 2, 2, 1],
         [0, 1, 