# Intro to **NDarray**

## 一維陣列

In [None]:
# 引入套件，使用 as 代表別名，可以讓我們少打一點字
import numpy as np

# 陣列元素可以使用 list 或 tuple 傳入
array_1 = np.array([1, 2, 3])
print(array_1)
print(array_1.dtype)
print(array_1.size)
print(array_1.shape)

In [None]:
data = np.empty(3)
print(data)
data = np.zeros(6)
print(data)
data = np.ones(10)
print(data)
data = np.arange(9)
print(data)

In [None]:
# 引入套件，使用 as 代表別名，可以讓我們少打一點字
import numpy as np

# 陣列元素可以使用 list 或 tuple 傳入
array_1 = np.array([1, 2, 3])

print(array_1[0])
# 取 1 到 3（不含）元素
print(array_1[1:3])
# 透過 mask 布林遮罩可以決定要取出哪些符合條件的元素陣列
# 符合的只有 index 2，元素為 3 的值
mask = (array_1 % 3 == 0)
print(mask)
print(array_1[mask])

## 二維陣列

In [2]:
import numpy as np
x = np.array([[1, 2, 3],
              [4, 5, 6]], np.int32) # np.int32 預設的
print(x)
print(type(x))
print(x.shape)
print(x.dtype)
print(x.size)

[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>
(2, 3)
int32
6


In [3]:
data = np.array([
    [1, 2, 3, 4]
])
print(data, data.shape) # 二維陣列
data = np.array([1, 2, 3, 4])
print(data, data.shape) # 一維陣列

[[1 2 3 4]] (1, 4)
[1 2 3 4] (4,)


In [None]:
data =np.ones([2,3])
print(data)
data =np.zeros([5,2])
print(data)
data =np.empty([2,5])
print(data)

In [None]:
x[1,2]

In [None]:
y = x[:,1]
y

In [None]:
y[0] = 9
y

In [None]:
x  # 會跟這被改掉

## 三維陣列

In [None]:
data = np.array([
    [
        [1, 2], [3, 4]
    ],
    [
        [5, 6], [7, 8]
    ]
])  # 先看第一層有幾個資料， 再看第二層， 再看第三層。
print(data)
print(data.shape)
data = np.zeros([3, 1, 3])
print(data)

## 高維陣列

In [None]:
data = np.array([
    [
        [
          [1, 2, 3]  
        ],
        [
          [4, 5, 6]
        ]
    ]
])
print(data, data.shape)
print('='*88)
data = np.ones([2,1,1,2])
print(data)

# Numpy Basic Operations

## 逐元運算 elementwise

In [None]:
data1 = np.array([3, 2, 10])
data2 = np.array([1, 3, 6])
result = data1 + data2 
print('加法:', result)
result = data1 * data2
print('乘法:', result)
result = data1 > data2
print('大於:', result)
result = data1 == data2
print('是否等於:', result)

In [None]:
A = np.array([[3,2],
              [0,1]])
B = np.array([[3,1],
              [2,1]])
print(A+B)
print(A-B)
print(A*B) # element wise multiplication # 亦可使用 np.multiply(A, B)
print(A/B)

## 矩陣運算 matrix

In [None]:
data1 = np.array([
    [1, 3]
]) # 1x2
data2 = np.array([
    [2, -1, 3],
    [-2, 4, 1]
]) # 2x3

result = data1@data2 # 矩陣內積 dot product
print(result) # 建議用這個方式
result = data1.dot(data2)
print(result)

In [None]:
result = np.outer(data1, data2) # 矩陣外積
print(result) # 2x6  # 1*data2 + 3*data2

In [None]:
print(data2.transpose()) # 兩個方法在更高維度有不同
print(data2.T)  

In [None]:
A = np.array([[3,2],
              [0,1]])
B = np.linalg.inv(A) # 反矩陣
print(B)
print(A@B)

## 統計運算 statistics

In [None]:
data = np.array([
    [2, 1, 7],
    [-5, 3, 8]
]) # 2x3
result = data.sum()
print('總和', result)
result = data.max()
print('最大值', result)
result = data.mean()
print('平均數', result)
result = data.std()
print('標準差', result)

In [None]:
result = data.sum(axis=0) # 針對欄做總和 (針對第一個維度做總和)
print(result)
result = data.sum(axis=1) # 針對列做總和 (針對第二個維度做總和)
print(result)
result = data.max(axis=0)
print(result)
result = data.mean(axis=1)
print(result)

In [None]:
result = data.cumsum() # 累加
print(result)
result = data.cumsum(axis=0) # 針對第一維資料逐值累加
print(result)

In [None]:
result = np.random.normal(5, 0.5, 1000) # np.random.normal(loc=0.0, scale=1.0, size=None)
# print(result, result.shape)
print('='*88)
print(np.min(result))
print(np.max(result))
print(np.mean(result))
print(np.median(result))
print(np.std(result))  # 這幾個函數在高維度也有 axis 的參數可用

In [None]:
data = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [-2, 3, -1]
])
result = np.sum(data)
print(result)
result = np.sum(data, axis = 0)
print(result)
result = np.mean(data, axis = 1)
print(result)

# 多維陣列的維度與形狀操作
## 

In [None]:
data = np.array([
    [3, 1, 5],
    [1, 2, 3]
])
print(data.shape) # 觀察資料的形狀 # 2x3
print(data.T) # 資料轉置 # 3x2


In [None]:
data = np.array([
    [
        [2, 1, 3],[1, 2, 3]
    ],
    [
        [0, 2, 1],[8, 9, 10]   
    ]
]) # (2,2,3)
newData = data.ravel()
print(newData)# 扁平化資料 # 注意資料的放置順序
print(newData.shape) # 一維


In [None]:
data = np.array([
    [
        [2, 1, 3], [1, 2, 3]
    ],
    [
        [0, 2, 1], [8, 9, 10]   
    ]
]) # 2x2x3=12
newData = data.reshape(3,4) # 3x4
print(newData) # 重塑資料形狀 # 注意資料的放置順序
newData =data.reshape(1,2,6)
print(newData)
newData =data.reshape(4,-1) # 要4列, 行電腦自行判斷
print(newData)

In [None]:
data = np.zeros(18).reshape(3, 2, 3)
print(data)
data = np.arange(9).reshape(3,3)
print(data)
print(data.T)

In [None]:
data = np.arange(10)
data.shape = (2,5) # 可直接改 data 的 shape
print(data)

# 多維陣列的索引、切片操作

## 多維度陣列的索引 indexing

In [None]:
data = np.array([1, 5, 2, 10])
print(data[1])
print('='*88)
data = np.array([
    [0, 1],
    [10, -5],
    [2, 6]    
])
print(data[0,1]) # 第一維度取0, 第二維度取1
print(data[1,0])
print(data[2,0])
print('='*88)
data = np.array([
    [
        [1, 2, 3], [4, 5, 6]
    ],
    [
        [7, 8, 9], [10, 11, 12]
    ]
]) # 2x2x3
print(data[1,1,0]) # 10
print(data[0,1,2]) # 6

## 多維陣列的切片 slicing

In [None]:
data = np.array([-1, -5, 2, 3])
print(data[0:3])
print(data[:2])
print(data[2:])
print(data[::-2])
print(data[:])
print('='*88)
data = np.array([
    [0, 1, 2],
    [3, 4, 5],
    [5, 4, 3],
    [2, 1, 0]
])
print(data[1:3,0:2]) # 2x2
print(data[0:2,1]) # 注意! 只有一維 (2,)
print('='*88)
data = np.array([
    [
        [8, 1, 3], [-5, 5, 2 ]
    ],
    [
        [0, 1, 6], [4, 4, -3]
    ]
])
print(data[0,...]) # ...表示全要 # 2x3
print(data[..., 1:3]) # 2x2x2


# 多維陣列的合併 stacking

## 合併第一個維度 vstack

In [None]:
array2D_1 = np.arange(9).reshape(3,3)
array2D_2 = np.arange(10, 19).reshape(3,3)
print(array2D_1)
print(array2D_2)
print(np.concatenate((array2D_1, array2D_2))) # 裡面要接tupleb
print(np.concatenate((array2D_1, array2D_2), axis = 1)) 

In [None]:
arr1 = np.array([
    [1, 2, 3],
    [4, 5, 6]
]) # 2x3
arr2 = np.array([
    [7, 8, 9],
    [10, 11, 12]
]) # 2x3
result = np.vstack((arr1, arr2)) #要接tuple  # 方法一
print(result) # 4x3
print("="*88)
result = np.concatenate((arr1, arr2)) #要接tuple  # 方法二，預設 axis =0
print(result) # 4x3

In [None]:
a=[[1],[2],[3]]
b=[[1],[2],[3]]
c=[[1],[2],[3]]
d=[[1],[2],[3]]
print(np.vstack((a,b,c,d))) # 想像成陣列

## 合併第二個維度 hstack

In [None]:
arr1 = np.array([
    [1, 2, 3],
    [4, 5, 6]
]) # 2x3
arr2 = np.array([
    [7, 8, 9],
    [10, 11, 12]
]) # 2x3
result = np.hstack((arr1,arr2)) # 要接tuple
print(result) # 2x6
print('='*88)
result =np.concatenate((arr1,arr2), axis = 1)
print(result) # 2x6

In [None]:
a=[[1],[2],[3]] # 3x1
b=[[1],[2],[3]] # 3x1
c=[[1],[2],[3]] # 3x1
d=[[1],[2],[3]] # 3x1
print(np.hstack((a,b,c,d))) # 想像成陣列 # 3x4

## stack

In [None]:
data = [[1,2,3], [4,5,6]]
print(data) # 列表data

print('='*88)
newData = np.stack(data, axis=0) #增加一维，新维度得下標為 0
print(newData) # 2x3

print('='*88)
newData = np.stack(data, axis=1) #增加一维，新维度得下標為 1
print(newData) # 3x2


In [None]:
data =[[1,2,3,4],
       [5,6,7,8],
       [9,10,11,12]] 
print(data) # 列表data

print('='*88)
newData = np.stack(data, axis=0) #增加一维，新维度得下標為 0
print(newData) # 3x4

print('='*88)
newData = np.stack(data, axis=1) #增加一维，新维度得下標為 1
print(newData) # 4x3

In [None]:
a = [1,2,3,4]
b = [5,6,7,8]
c = [9,10,11,12]
newData = np.stack((a, b, c), axis=0) # 也可放tuple
print(newData) # 3x4

print('='*88)
newData = np.stack((a, b, c), axis=1) 
print(newData) # 4x3


In [None]:
a=[[1,2,3], [4,5,6]]
b=[[1,2,3], [4,5,6]]
c=[[1,2,3], [4,5,6]]
d=[[1,2,3], [4,5,6]] # 2x3x4
e=np.stack((a,b,c,d),axis=0) #增加一维，新维度得下標為 0
print(e) # 4x2x3

print('='*88)
e=np.stack((a,b,c,d),axis=1) #增加一维，新维度得下標為 1
print(e) # 2x4x3
print('='*88)
e=np.stack((a,b,c,d),axis=2) # #增加一维，新维度得下標為 2
print(e) # 2x3x4


In [None]:
arrays = [np.random.randn(3,4) for _ in range(10)]  # 3x4x10
A = np.stack(arrays, axis=0)
print(A.shape) # 10x3x4
B = np.stack(arrays, axis=1)
print(B.shape) # 3x10x4 
C = np.stack(arrays, axis=2)
print(C.shape) # 3x4x10

# 多維陣列的切割操作

## 第一維度的切割

In [None]:
arr = np.array([
    [2, 4, 6, 8, 10, 12],
    [1, 3, 5, 7, 9, 11]
]) # 2x6
result = np.vsplit(arr, 2)
print(result) # [1x6, 1x6]

## 第二維度的切割

In [None]:
arr = np.array([
    [2, 4, 6, 8, 10, 12],
    [1, 3, 5, 7, 9, 11]
]) # 2x6
result = np.hsplit(arr, 2)
print(result) # [2x3, 2x3]
print('='*88)
result = np.hsplit(arr, 3)
print(result)  # [2x2, 2x2, 2x2]

# Broadcasting

In [None]:
x = np.arange(4) # (4, )
xx = x.reshape(4,1) # (4, 1)
xxx = x.reshape(1,4) # (1, 4)
y = np.ones(5) # (5, )
z = np.ones([3,4]) #(3, 4) #也可以接Tuple (3, 4)
print(x)
print(xx) 
print(xxx)
print(y)
print(z)

In [None]:
print(x + y) # operands could not be broadcast together with shapes (4,) (5,)

In [None]:
print(xx.shape) # (4, 1)
print(y.shape) # (5, )
print((xx + y).shape) # 推播得 (4, 5)

"""
np.array([                                                      
     [
       [0, 0, 0, 0, 0]
     ],
     [
       [1, 1, 1, 1, 1]
     ],
     [
       [2, 2, 2, 2, 2]
     ],
     [
       [3, 3, 3, 3, 3]
     ]
]) +

np.array([                                                      
     [
       [1, 1, 1, 1, 1]
     ],
     [
       [1, 1, 1, 1, 1]
     ],
     [
       [1, 1, 1, 1, 1]
     ],
     [
       [1, 1, 1, 1, 1]
     ]
])

"""
print("="*88)

In [None]:
print(xx + y) 

In [None]:
print(x.shape) # (4, )
print(z.shape) # (3, 4)
print((x + z).shape) # (3, 4)

"""
np.array([
   [0, 1, 2, 3],
   [0, 1, 2, 3],   
   [0, 1, 2, 3]
]) +
np.array([
   [1, 1, 1, 1],
   [1, 1, 1, 1],   
   [1, 1, 1, 1]
]) 

"""
print('='*88)

In [None]:
print(x + z) # 各自擴充到(3,4)之後, 再做加減乘除
print(x - z)
print(x*z)
print(x/z)

In [None]:
print(xx.shape) # (4, 1)
print(xxx.shape) # (1, 4)
print((xx + xxx).shape) # (4, 4)

"""
np.array([
   [0, 0, 0, 0],
   [1, 1, 1, 1],
   [2, 2, 2, 2],
   [3, 3, 3, 3]
])+
np.array([
   [0, 1, 2, 3],
   [0, 1, 2, 3],
   [0, 1, 2, 3],
   [0, 1, 2, 3]
])

"""
print("="*88)

In [None]:
print(xx + xxx)
print(xx * xxx)

# Mask and Fancy indexing

In [None]:
import numpy.ma as ma

x = np.array([1, 2, 3, -1, 5]) 
mx1 = ma.masked_array(x, mask = [0, 0, 0, 1, 0]) # Masked array
print(mx1)
print(mx1.mean())

In [None]:
mx2 = ma.masked_array(x, mask = x>2)
print(mx2)
print(mx2.mean())

In [None]:
x = np.array([1, 2, 3, -1, 5]) 
print(x[[True, True, False, False, False]]) # Mask indexing
print(x[(x>2)]) 

## 一維的 fancy indexing

In [None]:
# Fancy index 就是我們不用數值而是用矩陣當作index
x = np.arange(10,1,-1)
print(x)
print(x[[3,3,1,8]])
print(x[[2,2,3,0]])
y = x[np.array([[1, 1],
    
                [2, 3]])]
print(y)

In [None]:
rand = np.random.RandomState(42)
x = rand.randint(100, size = 10)
print(x)
print("-"*100)
print([x[3], x[7], x[2]]) # index 取值 
ind = [3, 7, 4]
print(x[ind])
ind = np.array([ [3, 7],
                 [4, 5]  ])
print(x[ind])

## 二維的 fancy indexing

In [None]:
X = np.arange(16).reshape(4, -1) # 改成 4列, 行由電腦自行判斷
print(X)
print(X[1,3]) # index
print(X[[0, 3], [1, 3]]) # 取 (0, 1) 與 (3, 3) 的值
print(X[1:3, 2:4]) # slicing
print(X[[1,3],0:2])

In [None]:
row = np.array([0, 1, 2]) 
col = np.array([1, 2, 3])

In [None]:
print(X[row, col]) # 重要, 區分跟slicing 的差別

In [None]:
X[0, col]

In [None]:
X[:2, col]

# 矩陣數值種類轉換

In [None]:
data = np.array([1, 2, 3.5])
print(data.dtype)
data = data.astype(int)
print(data.dtype)
print(data)


In [None]:
data = np.array([
    [
        ['11', '22']
    ],
    [
        ['33', '44']
    ]
]) # 2x1x2
print(data)
print(data.dtype)  # U表示Unicode，数据类型。  # 1表示元素位長，数據大小。
data = data.astype(int)
print(data.dtype)
print(data)

# **Numpy Missing Data**

In [None]:
data = np.array([1.0, 2.0, np.NaN, 7.0])
print(data)

In [None]:
x = np.array([1.0, 2.5, np.nan, 1.3, np.inf, 7.2])
print("input array with bad values:")
print(x)

xm = np.ma.masked_invalid(x)
print("masked version:")
print(xm)

In [None]:
x = np.array([1, 2, 3])
y = np.array([1, 0, 1])
print(x/y)

In [None]:
x = np.ma.array([1, 2, 3])
y = np.ma.array([1, 0, 1])
print(x/y)

In [None]:
x = np.ma.array([1, 2, 3], mask=[False, False, False]) # True 會被遮罩起來
y = np.ma.array([1, 0, 1])
print(x * y)
print(x/y)
z = np.array([1, np.nan, 2], mask)
print(x * z)
print (x/z)

In [None]:
data = [1, np.nan, np.nan] # 不能在列表上操作
data = np.array(data)
mask = np.isnan(data) # 偵測缺失值
data[mask] = 0
print(data) # 缺失值補 0

In [None]:
data = [1, np.nan, 2]
data = np.array(data)
mask = np.isnan(data) 
data = data[~mask]
print(data) # 拿掉缺失值

In [None]:
# 比較 pandas 的偵測缺失值
import pandas as pd
dict = {'First Score':[100, 90, np.nan, 95], 
        'Second Score': [30, 45, 56, np.nan], 
        'Third Score':[np.nan, 40, 80, 98]} 
df = pd.DataFrame(dict) 
print(df)

df.isnull() # using isnull() function   

# **Module 10 Duplicated Data**

In [None]:
import pandas as pd 
data = pd.read_csv("./employees.csv") 
display(data)

In [None]:
data.sort_values("First Name", inplace = True) # sorting by first name 
display(data)

In [None]:
# making a bool series 
bool_series = data["First Name"].duplicated() 
bool_series # 預設 第一次出現的為 False, 之後的為 True

In [None]:
data[bool_series] # 展示重複數據 

In [None]:
data = data[~bool_series] # passing NOT of bool series to see unique values only 
print(data)

