# **CNN 入門**
此份程式碼會介紹透過一個簡單的公開資料集，建置模型、訓練模型，並比較 DNN model 處理影像型資料的差異。

## 匯入所需套件

In [None]:
# import package
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [None]:
# PyTorch 相關套件
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T

In [None]:
NUM_CLASS = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

## Cifar10 資料讀入及前處理

![image](https://hackmd.io/_uploads/B1VQW0L8T.png)


In [None]:
# 使用torchvision的CIFAR10 dataset
train_ds = torchvision.datasets.CIFAR10(
    root='data',
    train=True,
    download=True,
    transform=T.ToTensor(),
)
test_ds = torchvision.datasets.CIFAR10(
    root='data',
    train=False,
    download=True,
    transform=T.ToTensor(),
)
batch_size = 128
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)

print(len(train_ds), 'train samples')
print(len(test_ds), 'test samples')
# train 中有 50000 筆訓練資料，以及 test 中有 10000 筆的測試資料

In [None]:
x, y = train_ds[0]
print(type(x), x.shape)
print(type(y), y)
# 第 1, 2 維度為影像大小 32*32、第 0 維度是 RGB 三原色，所以是 3

In [None]:
# get all label from dataset
y_train = [y for _, y in train_ds]
y_test = [y for _, y in test_ds]

In [None]:
uniques, counts = np.unique(y_train, return_counts=True)
print(uniques, counts)

plt.bar(uniques, counts)
plt.xticks(uniques)
plt.show()

In [None]:
uniques, counts = np.unique(y_test, return_counts=True)
print(uniques, counts)

plt.bar(uniques, counts)
plt.xticks(uniques)
plt.show()

In [None]:
img, label = train_ds[0]
plt.imshow(img.permute(1, 2, 0))
plt.title("label: {}".format(label), fontsize=15) # 第 0 筆圖像資料分類的位置
plt.axis("off")
plt.show()

In [None]:
# pixel value 0~1
plt.title('Distribution')
plt.hist(img.flatten(), bins=100)
plt.xlabel('pixel value')
plt.ylabel('count')
plt.show()

## 模型定義

In [None]:
dnn_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3*32*32, 32),
    nn.ReLU(),
    nn.Linear(32, 32),
    nn.ReLU(),
    nn.Linear(32, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, NUM_CLASS),
)
print(dnn_model)

* ### CNN Model
![image](https://hackmd.io/_uploads/r19BZCLUT.png)

In [None]:
cnn_model = nn.Sequential(
    # 第一層
    # 建立卷積層，設定32個3*3的filters
    # 設定ReLU為激活函數。
    nn.Conv2d(3, 32, 3, padding='same'),
    nn.ReLU(),
    # 第二層 - 卷積層 + 池化層
    nn.Conv2d(32, 32, 3, padding='same'),
    nn.ReLU(),
    nn.MaxPool2d(2), # img_size // 2
    # 第三層 - 卷積層
    nn.Conv2d(32, 64, 3, padding='same'),
    nn.ReLU(),
    # 第四層 - 卷積層 + 池化層
    nn.Conv2d(64, 64, 3, padding='same'),
    nn.ReLU(),
    nn.MaxPool2d(2), # img_size // 2
    # 建立分類模型 (MLP) : 平坦層 + 輸出層 (10)
    nn.Flatten(),
    nn.Linear(64*8*8, 10)
)
print(cnn_model)

In [None]:
inputs = torch.randn(1, 3, 32, 32)
print(cnn_model(inputs).shape)

In [None]:
def train_epoch(model, optimizer, loss_fn, train_dataloader, val_dataloader):
    # 訓練一輪
    model.train()
    total_train_loss = 0
    total_train_correct = 0
    for x, y in tqdm(train_dataloader, leave=False):
        optimizer.zero_grad() # 梯度歸零
        x, y = x.to(device), y.to(device) # 將資料移至GPU
        y_pred = model(x) # 計算預測值
        loss = loss_fn(y_pred, y) # 計算誤差
        loss.backward() # 反向傳播計算梯度
        optimizer.step() # 更新模型參數
        total_train_loss += loss.item()
        # 利用argmax計算最大值是第n個類別，與解答比對是否相同
        total_train_correct += ((y_pred.argmax(dim=1) == y).sum().item())

    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_acc = total_train_correct / len(train_dataloader.dataset)

    return avg_train_loss, avg_train_acc

def test_epoch(model, loss_fn, val_dataloader):
    # 驗證一輪
    model.eval()
    total_val_loss = 0
    total_val_correct = 0
    # 關閉梯度計算以加速
    with torch.no_grad():
        for x, y in val_dataloader:
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            total_val_loss += loss.item()
            # 利用argmax計算最大值是第n個類別，與解答比對是否相同
            total_val_correct += ((y_pred.argmax(dim=1) == y).sum().item())

    avg_val_loss = total_val_loss / len(val_dataloader)
    avg_val_acc = total_val_correct / len(val_dataloader.dataset)

    return avg_val_loss, avg_val_acc

def run(epochs, model, optimizer, loss_fn, train_loader, valid_loader, verbose=1):
    train_loss_log = []
    val_loss_log = []
    train_acc_log = []
    val_acc_log = []

    for epoch in tqdm(range(epochs)):
        avg_train_loss, avg_train_acc = train_epoch(model, optimizer, loss_fn, train_loader, valid_loader)
        avg_val_loss, avg_val_acc = test_epoch(model, loss_fn, valid_loader)
        train_loss_log.append(avg_train_loss)
        val_loss_log.append(avg_val_loss)
        train_acc_log.append(avg_train_acc)
        val_acc_log.append(avg_val_acc)
        if verbose == 1:
            print(f'Epoch: {epoch}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f} \
    | Train Acc: {avg_train_acc:.3f}, Val Acc: {avg_val_acc:.3f}')
    return train_loss_log, train_acc_log, val_loss_log, val_acc_log

## 開始訓練模型

In [None]:
learning_rate = 1e-4
loss_fn = nn.CrossEntropyLoss()

print('Training DNN model')
dnn_model = dnn_model.to(device)
optimizer = torch.optim.Adam(dnn_model.parameters(), learning_rate)
dnn_history = run(20, dnn_model, optimizer, loss_fn, train_loader, test_loader)

print('Training CNN model')
cnn_model = cnn_model.to(device)
optimizer = torch.optim.Adam(cnn_model.parameters(), learning_rate)
cnn_history = run(20, cnn_model, optimizer, loss_fn, train_loader, test_loader)

## 測試資料

In [None]:
with torch.no_grad():
    x, y = test_ds[0]
    x = x.to(device)
    y_pred = cnn_model(x.unsqueeze(0))
    print('y_pred.      : ', y_pred)
    print('y_pred.argmax: ', y_pred.argmax(dim=1))
    print('y            : ', y)

In [None]:
dnn_loss, dnn_acc = test_epoch(dnn_model, loss_fn, test_loader)
cnn_loss, cnn_acc = test_epoch(cnn_model, loss_fn, test_loader)

## 訓練結果視覺化

In [None]:
history_list = [cnn_history, dnn_history]
history_train_acc = ["cnn_train_acc", "dnn_train_acc"]
history_valid_acc = ["cnn_valid_acc", "dnn_valid_acc"]
history_train_loss = ["cnn_train_loss", "dnn_train_loss"]
history_valid_loss = ["cnn_valid_loss", "dnn_valid_loss"]

In [None]:
plt.figure(figsize=(20, 6))

# training loss
plt.subplot(1, 2, 1)
for each_his, each_train, each_valid in zip(history_list,
                                            history_train_loss,
                                            history_valid_loss):
    l_x = len(each_his[0])
    plt.plot(np.arange(l_x), each_his[0], label=each_train)
    plt.plot(np.arange(l_x), each_his[2], label=each_valid)
plt.legend(loc='best')
plt.title('Loss')

# training acc
plt.subplot(1, 2, 2)
for each_his, each_train, each_valid in zip(history_list,
                                            history_train_acc,
                                            history_valid_acc):
    l_x = len(each_his[0])
    plt.plot(np.arange(l_x), each_his[1], label=each_train)
    plt.plot(np.arange(l_x), each_his[3], label=each_valid)
plt.legend(loc='best')
plt.title('Accuracy')
plt.show()