<a href="https://colab.research.google.com/github/Madelinelai/Basic-Coding-Learn/blob/master/1_2_numpy_pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Numpy 與 Pandas 介紹**
Numpy 跟 Pandas 是機器學習當中兩個常用的套件。其中 Numpy 提供許多數學公式的實作以及高效率的陣列、矩陣運算。Pandas 提供方便的資料處理與資料分析功能。

# **Numpy 簡介**

[參考資料](https://machine-learning-notes.gitlab.io/docs/python/numpy/)

In [None]:
import numpy as np

a = np.array([0, 1, 2])            # 建立一維的 array
print(type(a))                     # 印出 "<class 'numpy.ndarray'>"
print(a.shape)                     # 印出 "(3,)"
print(a[0], a[1], a[2])            # 印出 "0 1 2"
b = np.array([[1,2,3],[4,5,6]])    # 建立二維的 array
print(b.shape)                     # 印出 "(2, 3)"
print(b[0, 0], b[0, 1], b[1, 0])   # 印出 "1 2 4"

c = np.zeros((2,2))   # 建立 2x2 全 0 的 array
print(c)              # 印出 "[[ 0.  0.]
                      #        [ 0.  0.]]"
d = np.ones((1,2))    # 建立 1x2 全 1 的 array

# indexing
b = np.array([[1,2,3],[4,5,6]]) 
print(b[:, 1])   # 印出 "[2 5]", 第零維任意且第一維 1 的值
print(b[:, 1:])  # 印出 "[[2 3]
                 #        [5 6]]", 第零維任意且第一維 1 (含)以上的值

# Boolean indexing
a = np.array([0, 1, 2])
print(a > 1)     # 印出 "[False, False, True]"
print(a[a > 1])  # 印出 "[2]"

<class 'numpy.ndarray'>
(3,)
0 1 2
(2, 3)
1 2 4
[[0. 0.]
 [0. 0.]]
[2 5]
[[2 3]
 [5 6]]
[False False  True]
[2]


In [None]:
import numpy as np

x = np.array([1.0, 2.0])
print(x.dtype)                           # 印出 "float64"
x = np.array([1, 2])
print(x.dtype)                           # 印出 "int32"
x = np.array([1.0, 2.0], dtype=np.int8)  # 可以指定資料型態
print(x.dtype)                           # 印出 "int8"
x[0] += 1000
print(x)                                 # 印出 "[-23 2]" int8 範圍為 -128 ~ 127，直接加 1000 會溢位

float64
int64
int8
[-23   2]


In [None]:
print(np.random.randn(3))        # 隨機亂數
print(np.dot([1,2,3], [4,5,6]))  # 向量內積
a = np.array([[1, 2], [2, 1]])
b = np.array([[1, 2], [2, 4]])
print(np.matmul(a, b))           # 矩陣相乘

[-0.23222358  0.76552081  0.20760801]
32
[[ 5 10]
 [ 4  8]]


# **Pandas 簡介**

[參考資料](https://machine-learning-notes.gitlab.io/docs/python/pandas/)

In [None]:
import pandas as pd

s = pd.Series(np.random.randn(3), index=['a', 'b', 'c'])
print(s)

a   -0.522425
b   -0.768493
c    1.940314
dtype: float64


In [None]:
d = {
    'A': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
    'B': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd'])
}

df = pd.DataFrame(d)
print(df)

          A         B
a -0.745534 -0.008578
b -0.886710  0.281486
c -0.470272  0.453517
d       NaN -0.963699


In [None]:
data = [
    [1, 2, 3],
    [3, 4, 5]
]

df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df)

   A  B  C
0  1  2  3
1  3  4  5


In [None]:
df = pd.DataFrame(np.random.randn(6, 4), columns=list('ABCD'))
print(df)
print('--------------')
print(df.head())
print('--------------')
print(df.tail(1))

          A         B         C         D
0 -0.503559 -0.505120  1.762016 -0.733032
1 -0.341494  0.274674 -0.112428  1.039485
2 -1.202137  0.467725 -0.628200 -0.074857
3  0.648787  0.947257  0.828798 -0.143545
4 -0.669693 -1.571197  1.154229  0.088604
5  0.515418 -1.965141  1.849047  2.739605
--------------
          A         B         C         D
0 -0.503559 -0.505120  1.762016 -0.733032
1 -0.341494  0.274674 -0.112428  1.039485
2 -1.202137  0.467725 -0.628200 -0.074857
3  0.648787  0.947257  0.828798 -0.143545
4 -0.669693 -1.571197  1.154229  0.088604
--------------
          A         B         C         D
5  0.515418 -1.965141  1.849047  2.739605


In [None]:
print(df.values)
print('--------------')
print(df.to_numpy())

[[-0.50355911 -0.50512033  1.76201632 -0.73303227]
 [-0.34149351  0.27467382 -0.11242813  1.03948533]
 [-1.20213742  0.46772454 -0.62820012 -0.07485729]
 [ 0.64878736  0.94725737  0.82879772 -0.14354526]
 [-0.6696928  -1.57119651  1.1542291   0.0886042 ]
 [ 0.51541817 -1.96514119  1.84904702  2.73960521]]
--------------
[[-0.50355911 -0.50512033  1.76201632 -0.73303227]
 [-0.34149351  0.27467382 -0.11242813  1.03948533]
 [-1.20213742  0.46772454 -0.62820012 -0.07485729]
 [ 0.64878736  0.94725737  0.82879772 -0.14354526]
 [-0.6696928  -1.57119651  1.1542291   0.0886042 ]
 [ 0.51541817 -1.96514119  1.84904702  2.73960521]]


In [None]:
print(df.describe())

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean  -0.258780 -0.391967  0.808910  0.486043
std    0.713946  1.170920  1.002406  1.244181
min   -1.202137 -1.965141 -0.628200 -0.733032
25%   -0.628159 -1.304677  0.122878 -0.126373
50%   -0.422526 -0.115223  0.991513  0.006873
75%    0.301190  0.419462  1.610070  0.801765
max    0.648787  0.947257  1.849047  2.739605
