# **從簡單的資料集開始訓練模型**
此份程式碼會從簡單的二維資料集介紹完整深度學習模型的訓練流程，從模型建置、模型訓練、模型評估，至模型儲存、載入重現結果。

## 本章節內容大綱
* ### [創建資料集／載入資料集（Dataset Creating/ Loading）](#DatasetCreating/Loading)
* ### [模型建置（Model Building）](#ModelBuilding)
* ### [模型訓練（Model Training）](#ModelTraining)
* ### [模型評估（Model Evaluation）](#ModelEvaluation)
* ### [模型儲存／載入（Model Saving/ Loading）](#ModelSaving/Loading)
-----------------

## 匯入套件

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm # 進度條

# PyTorch 相關套件
import torch
import torch.nn as nn

<a name="DatasetCreating/Loading"></a>
## 創建資料集／載入資料集（Dataset Creating / Loading）

In [None]:
np.random.seed(5566)
num_samples_per_class = 1000

# 創建負樣本 (X1_neg, X2_neg)
negative_samples = np.random.multivariate_normal(
    mean=[0, 3],  # 各維度的平均值
    cov=[[1, 0.5], [0.5, 1]],  # 各維度的共變異數
    size=num_samples_per_class)  # 樣本數量

# 創建正樣本 (X1_pos, X2_pos)
positive_samples = np.random.multivariate_normal(
    mean=[3, 0],  # 各維度的平均值
    cov=[[1, 0.5], [0.5, 1]],  # 各維度的共變異數
    size=num_samples_per_class)  # 樣本數量

print('shape of neg samples:', negative_samples.shape)
print('shape of pos samples:', positive_samples.shape)

In [None]:
inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)
targets = np.vstack((np.zeros((num_samples_per_class, 1),  # 負樣本標籤
                              dtype='float32'),
                     np.ones((num_samples_per_class, 1),  # 正樣本標籤
                             dtype='float32')))

inputs.shape, targets.shape

In [None]:
# 建立二維及三維的比較圖
plt.figure(figsize=(10, 4))
ax1 = plt.subplot(121)
ax2 = plt.subplot(122, projection='3d')

'''Plot on 2-dimension space'''
# 繪製訓練資料集
ax1.scatter(negative_samples[:, 0],
            negative_samples[:, 1],
            label='negative samples')
ax1.scatter(positive_samples[:, 0],
            positive_samples[:, 1],
            label='negative samples')

ax1.set_xlabel('x1')
ax1.set_ylabel('x2')
ax1.set_title('x1-x2 plane')

'''Plot on 3-dimensions space'''
# 繪製訓練資料集
ax2.scatter(negative_samples[:, 0],
            negative_samples[:, 1],
            np.zeros((num_samples_per_class, 1), dtype='float32'),
            label='negative samples')

ax2.scatter(positive_samples[:, 0],
            positive_samples[:, 1],
            np.ones((num_samples_per_class, 1), dtype='float32'),
            label='positive samples')

ax2.set_xlabel('x1')
ax2.set_ylabel('x2')
ax2.set_zlabel('y')
ax2.set_title('x1-x2-y space')
ax2.view_init(45, 285)
plt.show()

#### 建立資料集

In [None]:
# 切分train, validation資料集
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(inputs, targets,
                                                  test_size=0.2, # 驗證資料比例
                                                  random_state=5566)
# 利用torch.utils.data.Dataset讀取1筆資料
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train),
                                                torch.from_numpy(y_train))
val_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_val),
                                             torch.from_numpy(y_val))

# 利用torch.utils.data.DataLoader讀取批次資料
BATCH_SIZE = 4
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=BATCH_SIZE,
                                             shuffle=False)

<a name="ModelBuilding"></a>
## 模型建置（Model Building）

目標：找到一個平面可以擬合這兩群資料點，假設此平面方程式為下列式子

![](https://hackmd.io/_uploads/rJiUtK8-T.png)


* ### Sequential model（序列模型）
單輸入單輸出的模型，依順序堆疊網路層。

In [None]:
torch.manual_seed(17)  # 設定 PyTorch 隨機種子

model = torch.nn.Sequential()
model.add_module('dense', torch.nn.Linear(inputs.shape[1], 1))  # 輸入形狀, 神經元個數

# 以下寫法等同以上結果，將所有網路層按順序，以串列(list)的方式輸進 Sequential
layers = [torch.nn.Linear(inputs.shape[1], 1)]
model = torch.nn.Sequential(*layers)

In [None]:
print(model)

<a name="ModelTraining"></a>
## 模型訓練（Model Training）

In [None]:
# 選擇計算裝置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('using device: ', device)

# 模型移致GPU
model.to(device)

* ### 模型編譯（model compile）
設定模型訓練時，所需的優化器 (optimizer)、損失函數 (loss function)

In [None]:
# 優化器
optimizer = torch.optim.RMSprop(
    model.parameters(), # 優化的模型參數
    lr=1e-3 # 學習率
)

In [None]:
# 損失函數
loss_fn = torch.nn.MSELoss()

In [None]:
def train_epoch(model, optimizer, loss_fn, train_dataloader, val_dataloader):
    # 訓練一輪
    model.train()
    total_train_loss = 0
    for x, y in tqdm(train_dataloader, leave=False):
        x, y = x.to(device), y.to(device) # 將資料移至GPU
        y_pred = model(x) # 計算預測值
        loss = loss_fn(y_pred, y) # 計算誤差
        optimizer.zero_grad() # 梯度歸零
        loss.backward() # 反向傳播計算梯度
        optimizer.step() # 更新模型參數

        total_train_loss += loss.item()
    # 驗證一輪
    model.eval()
    total_val_loss = 0
    # 關閉梯度計算以加速
    with torch.no_grad():
        for x, y in val_dataloader:
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            total_val_loss += loss.item()

    return total_train_loss / len(train_dataloader), total_val_loss / len(val_dataloader)

In [None]:
train_loss_log = []
val_loss_log = []
for epoch in tqdm(range(20)):
    train_loss, val_loss = train_epoch(model, optimizer, loss_fn, train_dataloader, val_dataloader)
    train_loss_log.append(train_loss)
    val_loss_log.append(val_loss)
    print(f'Epoch: {epoch}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')


<a name="ModelEvaluation"></a>
## 模型評估（Model Evaluation）

* ### 視覺化訓練過程的評估指標 （Visualization）

In [None]:
# 繪製 Epochs vs. MSE
plt.figure(figsize=(15, 4))
plt.plot(range(len(train_loss_log)), train_loss_log, label='train_loss')
plt.plot(range(len(val_loss_log)), val_loss_log, label='valid_loss')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

* ### 模型預測（Model predictions）

In [None]:
def evaluate(model, val_dataloader):
    y_pred = []
    y_true = []
    with torch.no_grad():
        for x, y in val_dataloader:
            x = x.to(device)
            pred = model(x)
            y_pred.append(pred.cpu())
            y_true.append(y)

    y_pred = torch.cat(y_pred)
    y_true = torch.cat(y_true)

    print(y_pred.shape, y_true.shape)

    loss = loss_fn(y_pred, y_true)
    print(f'MSE: {loss}')

In [None]:
evaluate(model, val_dataloader)

* ### 視覺化結果

In [None]:
model.state_dict() # 模型參數

In [None]:
model_parameters = model.state_dict()
w = model_parameters['0.weight'].reshape(-1).cpu().numpy()
b = model_parameters['0.bias'].cpu().numpy()

print(w, b)

In [None]:
# 建立二維及三維的比較圖
plt.figure(figsize=(10, 4))
ax1 = plt.subplot(121)
ax2 = plt.subplot(122, projection='3d')

'''Plot on 2-dimension space'''
# 決策邊界函數為 w1*x1 + w2*x2 + b = 0.5
x = np.linspace(-3, 6, 100)  # 從 -3 到 6 切分 100 等分
boundary = - w[0] / w[1] * x + (0.5 - b) / w[1]

# 繪製決策邊界線
ax1.plot(x, boundary, '-r', label='Decision Boundary')

# 繪製訓練資料集
ax1.scatter(negative_samples[:, 0],
            negative_samples[:, 1],
            label='negative samples')
ax1.scatter(positive_samples[:, 0],
            positive_samples[:, 1],
            label='negative samples')

ax1.set_xlabel('x1')
ax1.set_ylabel('x2')
ax1.set_title('x1-x2 plane')

'''Plot on 3-dimensions space'''
x1 = np.linspace(-3, 5, 100)
x2 = np.linspace(-3, 5, 100)
x1, x2 = np.meshgrid(x1, x2)  # ［-3:5, -3:5］切分成 100x100 個位置點
y = w[0] * x1 + w[1] * x2 + b

ax2.contour3D(x1, x2, y, 100, alpha=0.5, cmap='viridis')  # 擬合平面
ax2.plot3D(x, boundary, 0.5, '-r', label='Decision Boundary')  # 決策邊界線

# 繪出訓練資料集
ax2.scatter(negative_samples[:, 0],
            negative_samples[:, 1],
            np.zeros((num_samples_per_class, 1), dtype='float32'),
            label='negative samples',
            depthshade=False)
ax2.scatter(positive_samples[:, 0],
            positive_samples[:, 1],
            np.ones((num_samples_per_class, 1), dtype='float32'),
            label='positive samples',
            depthshade=False)

ax2.set_zlim(0, 1)
ax2.set_xlabel('x1')
ax2.set_ylabel('x2')
ax2.set_zlabel('y')
ax2.set_title('x1-x2-y space')
ax2.view_init(45, 285)
ax2.legend(bbox_to_anchor=(1.05, 1))

plt.show()

<a name="ModelSaving/Loading"></a>
## 模型儲存／載入（Model Saving/ Loading）

In [None]:
torch.save(model, 'model.pt') # 儲存位置

In [None]:
new_model = torch.load('model.pt')  # 讀取位置

In [None]:
print(new_model)

In [None]:
evaluate(new_model, val_dataloader)

----------------
## 動手試試看：
1. 嘗試改動 random seed，觀察訓練的結果（收斂速度以及 MSE 表現等等）
2. 嘗試改動 batch_size，觀察訓練的結果（收斂速度以及 MSE 表現等等）