# Ch01 Create NumPy Array

In [3]:
import numpy as np

## NumPy 陣列創建 via numpy.array
NumPy arrays can be created via the numpy.array constructor directly

In [10]:
# 語法查詢
np.array?
# 等同下者：
# help(np.array)
# np.array.__doc__




In [11]:
# 在未完成的語法後面按下 tab 鍵，會開啟自動完成功能 (語法提示) 
# np.ar -> np.arange
np.arange

<function numpy.arange>

## NumPy array vs Python list performance

In [12]:
# 計時用，比較 python list 與 numpy array 效能
import time
import numpy as np

size_of_vec = 1000000

# Python list function
def python_list():
    # 取得初始時間
    t1 = time.time()
    # 創建 0 - 999999 的 list
    X = range(size_of_vec)
    Y = range(size_of_vec)
    # 用 X、Y array 創 Z array
    Z = [X[i] + Y[i] for i in range(len(X))]
    # 結束時間 - 初始時間 => 運算所費時間
    return time.time() - t1

# 對 numpy array 做一樣的事
def numpy_array():
    t1 = time.time()
    X = np.arange(size_of_vec)
    Y = np.arange(size_of_vec)
    # vector 可直接運算
    Z = X + Y
    return time.time() - t1

# 印出兩者時間差
count1 = python_list()
count2 = numpy_array()
print("****** Test run with {} elements ******".format(size_of_vec))
print("Python list: {}".format(count1))
print("NumPy array: {}".format(count2))
print("NumPy array is " + str(count1/count2) + " faster!")

****** Test run with 100000000 elements ******
Python list: 7.449740171432495
NumPy array: 0.4636554718017578
NumPy array is 16.067404839379815 faster!


In [3]:
# 資料類型確認
arr = np.array([3, 5, 7, 9])
print(type(arr))

<class 'numpy.ndarray'>


In [4]:
# 如果希望宣告變數後，直接預覽變數，可以用「;」隔開，寫在同一行:
a = np.array([3, 5, 7, 9]); a

array([3, 5, 7, 9])

#### np.arange(): 從數值範圍來建立陣列

In [5]:
# 跟 python range() 用法一樣
'''
用法:
numpy.arange([start, ]stop, [step, ]dtype=None, *, like=None)
'''
a = np.arange(10); a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
# 取得陣列 [1, 2, 3, 4, 5, 6, 7, 8, 9]
a = np.arange(1, 10); a

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
# 1 ~ 10 之間，每隔 3 個元素，加入資料到陣列: [1, 4, 7] (首, 末, 公差)
a = np.arange(1, 10, 3); a

array([1, 4, 7])

#### np.linspace(): 從數值範圍透過線性切割建立陣列

In [18]:
# 線性分割
'''
用法:
args - start element,end element, number of elements
np.linspace(首, 末, 共幾項)
「(末 - 首) / 項次」決定公差
'''
a = np.linspace(0, 10, 4)
b = np.linspace(1 , 11, 4)
print(a, b)

[ 0.          3.33333333  6.66666667 10.        ] [ 1.          4.33333333  7.66666667 11.        ]


## NumPy 資料型態

In [13]:
# 自動判斷資料型態
a = np.array([1,2,3,4]); a.dtype

dtype('int32')

In [15]:
# 自動判斷資料型態 (其中一個元素變成浮點數，dtype 會自動轉型)
a = np.array([1.3, 2, 3, 4]); a.dtype

dtype('float64')

In [18]:
# 指定陣列每一個資料的資料型態 (指定 float 預設為 float64)
a = np.array([1, 2, 3, 4], dtype='float'); a.dtype
# arr8.dtype

dtype('float64')

In [20]:
# Unicode 最多9個字元
arr =np.array(['Goodbye','Welcome','Tata','Goodnight']); arr.dtype

dtype('<U9')

In [21]:
# 最多9個字元，超過者截除
arr[2] = "this is a book"
arr[2]

np.str_('this is a')

In [27]:
# numpy 有boolean資料型態 適用於masking

a = np.array([True, False, True, False]); 
print(a.dtype)
b = np.array([1, 2, 3, 4])
print(b[a]) # [1, 3]，篩掉 false 者

bool
[1 3]


In [28]:
# 傳回一個全新的整數陣列
a_float = np.array([1, 2, 3, 4], dtype='float'); a_float.dtype

a_int = a_float.astype(int); a_int.dtype

print("a_float: ", a_float)
print(f"a_float 記憶體位址: {id(a_float)}")

print("a_int: ", a_int)
print(f"a_int 記憶體位址: {id(a_int)}")


a_float:  [1. 2. 3. 4.]
a_float 記憶體位址: 2138879114576
a_int:  [1 2 3 4]
a_int 記憶體位址: 2138879003984


## Numpy 陣列的屬性
- shape, ndim, dtype, itemsize, size
### Dimensions or Axes in NumPy
- Scalars (0D Arrays)
- Vectors (1D Arrays)
- Matrices (2D Arrays)
- Tensor (3D Arrays)


In [30]:
# Scalars (0D Arrays)
s = np.array(21)

print(s)
# 幾個 dimension
print("Number of axes:", s.ndim)
# 內部 type 為何
print("data type:", s.dtype)
# 有幾個 data point
print("data size:", s.size)
# 零維無 shape
print("Shape:", s.shape)

21
Number of axes: 0
data type: int64
data size: 1
Shape: ()


In [48]:
# Vectors (1D Arrays) 向量（一維陣列）
vec = np.array([-1, 2, 7, 9, 2])

print(vec)
print("Number of axes:", vec.ndim)
print("data type:", vec.dtype)
# array 中有 5 項
print("data size:", vec.size)
print("Shape:", vec.shape) # (5,) 一維五項

[-1  2  7  9  2]
Number of axes: 1
data type: int32
data size: 5
Shape: (5,)


In [49]:
# Matrices (2D Arrays) 矩陣（二維陣列）
mat = np.array([
    [1.2, 2.3, 3.3],
    [5.5, 6.3, 7.5]
])

print(mat)
print("Number of axes:", mat.ndim)
print("data type:", mat.dtype)
# array 中共有 6 項
print("data size:", mat.size)
# (2 row, 3 col)
print("Shape:", mat.shape)

[[1.2 2.3 3.3]
 [5.5 6.3 7.5]]
Number of axes: 2
data type: float64
data size: 6
Shape: (2, 3)


In [50]:
# 3D Arrays 3D 陣列
# 4 層、每層各 (2 row, 3 col)
t = np.array([
    [[1, 3, 9],
    [7, -6, 2]],

    [[2, 3, 5],
    [0, -2, -2]],

    [[9, 6, 2],
    [-7, -3, -12]],

    [[2, 4, 5],
    [-1, 9, 8]]
])

print(t)
print("Number of axes:", t.ndim)
print("data type:", t.dtype)
# 共 24 個 data point
print("data size:", t.size)
# shape 4 層，每層 (2 row, 3 col)
print("Shape:", t.shape)

[[[  1   3   9]
  [  7  -6   2]]

 [[  2   3   5]
  [  0  -2  -2]]

 [[  9   6   2]
  [ -7  -3 -12]]

 [[  2   4   5]
  [ -1   9   8]]]
Number of axes: 3
data type: int32
data size: 24
Shape: (4, 2, 3)


## NumPy 陣列創建函式
These functions include 
- numpy.zeros, 
- numpy.ones, 
- numpy.eye, 
- nrandom.rand,
- numpy.random.randn,
- numpy.empty

#### numpy.zeros

In [8]:
# Produce 4x2 array of zeros.
# 配置記憶體空間，並將記憶體內容清0
arr_zeros = np.zeros((4,2)); arr_zeros # 產內容為 0 的 array



array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

#### numpy.ones

In [32]:
# Produces 2x3x2 array of 1's.
# pandas 只支援二維資料結構
# 在深度學習 tensor 資料型態才會是三維資料結構
arr_ones =np.ones((2,3,4)); arr_ones # 產內容為 1 的 array

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

#### numpy.eye

In [55]:
# Produces identity matrix
arr_eye = np.eye(3); arr_eye # 產單位矩陣

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

#### numpy.empty

In [33]:
# np.empty(): 建立尚未初始化的陣列，裡面的元素是隨機產生的結果，換句話說，只產生陣列，值都是隨機產生的
# 註: 若需要初始化 (同時給元素預設的值)，建議使用 np.zeros、np.ones 或 np.fill
'''
numpy.empty(shape, dtype=float, order='C', *, like=None)
產空陣列，亂塞值
'''
arr_empty = np.empty((3,3)); arr_empty

array([[6.23042070e-307, 4.67296746e-307, 1.69121096e-306],
       [9.34609111e-307, 1.42413555e-306, 1.78019082e-306],
       [1.37959740e-306, 2.29178686e-312, 0.00000000e+000]])

#### numpy.full

In [34]:
# np.full(): 建立填滿 fill_value 的陣列，透過 shape 指定幾維 
'''
numpy.full (shape, fill_value, dtype=None, order='C', *, like=None)
產 array，全塞某值
'''
arr_full = np.full((3,4), 10); arr_full # 產 (3 row, 5 col) matrix，每個都填 10

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

### numpy.random.rand

In [9]:
# Using the rand, randn functions
# rand(m) produces an array of the given shape 
# with uniformly distributed random numbers over [0,1)
np.random.seed(200) # Set seed
arr_random = np.random.rand(10); arr_random # 0-1 間隨機數值，seed 可控產出隨機性，同 seed 則不論產幾次數值皆相同

array([0.94763226, 0.22654742, 0.59442014, 0.42830868, 0.76414069,
       0.00286059, 0.35742368, 0.90969489, 0.45608099, 0.98180271])

In [7]:
# randn(m) produces m normally distributed (Gaussian) random numbers
# 用常態分佈產 n 個 random number
arr_random_num = np.random.randn(3); arr_random_num


array([-0.21375427, -0.65118362,  0.4757528 ])