## Ch8. 基於影像的深度學習案例

### 訓練VGG網路 (FinetuneVGG.ipynb)於 Mnist 資料集上

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import random
import numpy as np
from torchvision import models
import torch.nn.functional as F

In [None]:
EPOCH = 50
BATCH_SIZE = 50           
LR =0.001                 
if_use_gpu = 1    

In [None]:
class VGGNet(nn.Module):                        # 定義 VGG 網路
    def __init__(self, num_classes=10):	   
        super(VGGNet, self).__init__()  
        net = models.vgg16(pretrained=True)     # 使用預先訓練好的VGG來抽取特徵   
        self.features = net		
        self.classifier = nn.Sequential(        # 定義線性轉換、激活函數以及輸出層
                nn.Linear(512 * 7 * 7, 512),    
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(512, 128),
                nn.ReLU(True),
                nn.Dropout(),
            nn.Linear(128, num_classes),        # 輸出的特徵長度為分類數
                                                # 此處為10，因為 mninst 一共有 10 個數字
        )

    def forward(self, x):                       # 定義資料如何傳遞，x 表示傳入的特徵
        x = self.features(x)                    # 使用 VGG 抽取特徵
        x = x.view(x.size(0), -1)               # 將特徵攤平
        x = self.classifier(x)                  # 經過幾層MLP、激活函數並輸出結果
        return x



In [None]:
DOWNLOAD_MNIST = True                               # 定義是否要下載 Mnist 資料集

train_data = torchvision.datasets.MNIST(            # 準備 Mnist 訓練集
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),    # 將資料轉換成 tensor
    download=DOWNLOAD_MNIST
)
test_data = torchvision.datasets.MNIST(              # 準備 Mnist 測試集
    root='./mnist/', 
    train=False,
    transform=torchvision.transforms.ToTensor(),    # 將資料轉換成 tensor
    download=DOWNLOAD_MNIST,
)

train_loader = Data.DataLoader(dataset = train_data, batch_size = BATCH_SIZE, shuffle=True)  # 定義訓練集的 dataloader
test_loader = Data.DataLoader(dataset = test_data, batch_size = 1, shuffle=True)    # 定義測試集的 dataloader

In [None]:
vgg = VGGNet()                                              # 實例化事先定義好的網路
optimizer = torch.optim.Adam(vgg.parameters(), lr=LR)       # 定義優化器，使用Adam作為優化器
loss_function = nn.CrossEntropyLoss()                       # 定義損失函數，使用CrossEntropyLoss
if if_use_gpu:                                              # 是否要使用GPU進行訓練
    vgg = vgg.cuda()                                        # 若為是：將網路傳至GPU

for epoch in range(10):                                     # 開始訓練網路
    for step, (x, y) in enumerate(train_loader):            # 將訓練資料迭代取出
        b_x = Variable(x, requires_grad=False)
        b_y = Variable(y, requires_grad=False)
        if if_use_gpu:                                      # 是否要使用GPU進行訓練
            b_y = b_y.cuda()                                # 若為是：將訓練資料傳至GPU
        
        b_c = torch.zeros([BATCH_SIZE,3,28,28])             # 將單通道圖轉換成三通道圖
        for i in range(len(b_x)):                   
            c = torch.cat((b_x[i],b_x[i],b_x[i]),0)
            b_c[i] = c
        
        b_c = F.interpolate(b_c,scale_factor=2,mode="bilinear", align_corners=True) # 將輸入圖做雙線性內插
        b_c = b_c.cuda()
        output = vgg(b_c)                                                           # 將影像資料傳入網路中
        loss = loss_function(output, b_y)                                           # 將網路的輸出與標準答案傳入損失函數，計算損失
        optimizer.zero_grad()                                                       # 將優化器中的梯度設為 0
        loss.backward()                                                             # 反向傳播計算梯度
        optimizer.step()                                                            # 優化器進行模型參數更新

        if step % 1000 == 0:                                                         # 每100steps 輸出一次train loss
            print('Epoch:', epoch, '|step:', step, '|train loss:%.4f'%loss.data)

torch.save(vgg.state_dict(), "./vgg.pt")                                             # 訓練完成後將模型參數存起來



Epoch: 0 |step: 0 |train loss:2.3017
Epoch: 0 |step: 1000 |train loss:2.2883
Epoch: 1 |step: 0 |train loss:2.2879
Epoch: 1 |step: 1000 |train loss:2.2923
Epoch: 2 |step: 0 |train loss:2.3080
Epoch: 2 |step: 1000 |train loss:2.2896
Epoch: 3 |step: 0 |train loss:2.3106
Epoch: 3 |step: 1000 |train loss:2.3126
Epoch: 4 |step: 0 |train loss:2.3013
Epoch: 4 |step: 1000 |train loss:2.2948
Epoch: 5 |step: 0 |train loss:2.2947
Epoch: 5 |step: 1000 |train loss:2.3157
Epoch: 6 |step: 0 |train loss:2.3085
Epoch: 6 |step: 1000 |train loss:2.3182
Epoch: 7 |step: 0 |train loss:2.3056
Epoch: 7 |step: 1000 |train loss:2.3021
Epoch: 8 |step: 0 |train loss:2.2914
Epoch: 8 |step: 1000 |train loss:2.2987
Epoch: 9 |step: 0 |train loss:2.2921
Epoch: 9 |step: 1000 |train loss:2.3061


In [None]:
vgg = VGGNet()
vgg.load_state_dict(torch.load("./vgg.pt", map_location="cuda:0"))  # 將先前訓練好的結果讀入
if if_use_gpu:                                                      # 是否要使用GPU進行訓練
    vgg = vgg.cuda()                                                # 若為是：將網路傳至GPU

error = 0
for step, (x, y) in enumerate(test_loader):                         # 將資料迭代產生出來
    b_x = Variable(x, requires_grad=False)
    b_y = Variable(y, requires_grad=False)
    if if_use_gpu:                                                  # 是否使用GPU
        b_x = b_x.cuda()                                            # 將測試資料移至GPU
        b_y = b_y.cuda()      
    
    b_c = torch.zeros([1,3,28,28])                                  # 將單通道圖轉換成三通道圖
    for i in range(len(b_x)):
        c = torch.cat((b_x[i],b_x[i],b_x[i]),0)
        b_c[i] = c
    
    b_c = b_c.cuda()
    b_c = F.interpolate(b_c,scale_factor=2,mode="bilinear", align_corners=True) # 將輸入圖做雙線性內插
    
    output = vgg(b_c)                                               # 將影像資料傳入網路中，產生預測結果
    result = torch.argmax(output,dim=1)
    
    A = result.tolist()
    B = b_y.tolist()
    
    if A[0] != B[0]:                                                # 計算錯誤次數
        error+=1

error_rate = error/10000                                            # 計算錯誤率及準確率
print("The error rate is ", error_rate*100,"%")
print("The accuracy rate is ", (1-error_rate)*100,"%")

The error rate is  88.64999999999999 %
The accuracy rate is  11.350000000000005 %
