# **Homework 3 - Convolutional Neural Network**

若有任何問題，歡迎來信至助教信箱 ntu-ml-2020spring-ta@googlegroups.com

In [1]:
# !gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
# !unzip food-11.zip # 解壓縮

In [2]:
# Import需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset, ConcatDataset
from apex import amp
import time

#Read image
利用 OpenCV (cv2) 讀入照片並存放在 numpy array 中

In [3]:
def readfile(path, label):
    # label 是一個 boolean variable，代表需不需要回傳 y 值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        # resize to 128 x ? or ? x 128
        height = img.shape[0]
        width = img.shape[1]
        rate = 128 / max(height, width)
        height = int(height * rate)
        width = int(width * rate)
        img = cv2.resize(img, (width, height))
        # pad black
        # from https://blog.csdn.net/qq_20622615/article/details/80929746
        W, H = 128, 128
        top = (H - height) // 2
        bottom = (H - height) // 2
        if top + bottom + height < H:
            bottom += 1
        left = (W - width) // 2
        right = (W - width) // 2
        if left + right + width < W:
            right += 1
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        # to np array
        x[i, :, :] = img
        if label:
          y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

In [4]:
# 分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))

Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347


# Dataset
在 PyTorch 中，我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來"包裝" data，使後續的 training 及 testing 更為方便。

Dataset 需要 overload 兩個函數：\_\_len\_\_ 及 \_\_getitem\_\_

\_\_len\_\_ 必須要回傳 dataset 的大小，而 \_\_getitem\_\_ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。

實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。


In [6]:
# training 時做 data augmentation
train_transform1 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomChoice([
        transforms.RandomHorizontalFlip(),
        transforms.RandomPerspective()
    ]),
    transforms.RandomChoice([
        transforms.RandomAffine(10), # 隨機線性轉換
        transforms.RandomRotation(40)
    ]),
    transforms.ColorJitter(), # 隨機色溫等
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
train_transform2 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomOrder([
        transforms.RandomChoice([
            transforms.RandomHorizontalFlip(),
            transforms.RandomPerspective()
        ]),
        transforms.RandomAffine(10), # 隨機線性轉換
        transforms.RandomResizedCrop((128, 128), scale=(0.8, 1.0)), # 隨機子圖
    ]),
    transforms.RandomChoice([
        transforms.ColorJitter(), # 隨機色溫等
        transforms.RandomGrayscale(),
    ]),
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
    transforms.RandomErasing(0.2)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [7]:
batch_size = 64
train_set = ConcatDataset([
    ImgDataset(train_x, train_y, train_transform1),
    ImgDataset(train_x, train_y, train_transform2)
])
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# Model

In [8]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
#             nn.Conv2d(3, 128, 5, 1, 3),  # [3, 128, 128]
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
            
            nn.Conv2d(3, 128, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]
            
            nn.Dropout2d(0.5),

            nn.Conv2d(128, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.PReLU(1),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]
            
            nn.Dropout2d(0.1),

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 512),
            nn.ReLU(),
            
            nn.Linear(512, 256),
            nn.ReLU(),
            
            nn.Linear(256, 128),
            nn.Dropout(0.3),
            nn.ReLU(),
            
            nn.Linear(128, 100),
            nn.ReLU(),
            
            nn.Linear(100, 30),
            nn.PReLU(1),

            nn.Linear(30, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# Training

使用 training set 訓練，並使用 validation set 尋找好的參數

In [9]:
model = Classifier().cuda()
# model = Classifier().cpu()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 300

# # use apex to optimize
# model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
#         train_pred = model(data[0].cpu())
#         batch_loss = loss(train_pred, data[1].cpu())
#         with amp.scale_loss(batch_loss, optimizer) as scaled_loss:
#             scaled_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        batch_loss.backward() 
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())
#             val_pred = model(data[0].cpu())
#             batch_loss = loss(val_pred, data[1].cpu())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

  res = torch.gels(B, A)[0]


[001/300] 85.26 sec(s) Train Acc: 0.231603 Loss: 0.033751 | Val Acc: 0.283965 loss: 0.031214
[002/300] 84.91 sec(s) Train Acc: 0.279749 Loss: 0.032084 | Val Acc: 0.332070 loss: 0.030581
[003/300] 84.92 sec(s) Train Acc: 0.317505 Loss: 0.030593 | Val Acc: 0.328863 loss: 0.029614
[004/300] 84.98 sec(s) Train Acc: 0.340817 Loss: 0.029438 | Val Acc: 0.363557 loss: 0.027770
[005/300] 84.75 sec(s) Train Acc: 0.369451 Loss: 0.028267 | Val Acc: 0.428280 loss: 0.026063
[006/300] 84.85 sec(s) Train Acc: 0.404825 Loss: 0.026986 | Val Acc: 0.444606 loss: 0.024675
[007/300] 85.04 sec(s) Train Acc: 0.433306 Loss: 0.025772 | Val Acc: 0.478717 loss: 0.023667
[008/300] 84.94 sec(s) Train Acc: 0.456416 Loss: 0.024808 | Val Acc: 0.475219 loss: 0.024637
[009/300] 84.92 sec(s) Train Acc: 0.476637 Loss: 0.023954 | Val Acc: 0.519825 loss: 0.022081
[010/300] 84.82 sec(s) Train Acc: 0.498682 Loss: 0.022874 | Val Acc: 0.509913 loss: 0.022766
[011/300] 84.96 sec(s) Train Acc: 0.516825 Loss: 0.022174 | Val Acc: 0

得到好的參數後，我們使用 training set 和 validation set 共同訓練（資料量變多，模型效果較好）

In [11]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ConcatDataset([
    ImgDataset(train_val_x, train_val_y, train_transform1),
    ImgDataset(train_val_x, train_val_y, train_transform2),
])
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [12]:
model_best = Classifier().cuda()
# model_best = Classifier().cpu()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 250

# use apex to optimize
# model_best, optimizer = amp.initialize(model_best, optimizer, opt_level="O3")

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
#         train_pred = model_best(data[0].cpu())
#         batch_loss = loss(train_pred, data[1].cpu())
#         with amp.scale_loss(batch_loss, optimizer) as scaled_loss:
#             scaled_loss.backward()
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/250] 106.97 sec(s) Train Acc: 0.230520 Loss: 0.033811
[002/250] 107.50 sec(s) Train Acc: 0.288658 Loss: 0.031306
[003/250] 107.75 sec(s) Train Acc: 0.335214 Loss: 0.029508
[004/250] 107.67 sec(s) Train Acc: 0.377820 Loss: 0.027924
[005/250] 108.67 sec(s) Train Acc: 0.419073 Loss: 0.026260
[006/250] 107.79 sec(s) Train Acc: 0.449835 Loss: 0.025025
[007/250] 107.91 sec(s) Train Acc: 0.476722 Loss: 0.023769
[008/250] 108.00 sec(s) Train Acc: 0.507483 Loss: 0.022662
[009/250] 107.82 sec(s) Train Acc: 0.524519 Loss: 0.021819
[010/250] 107.86 sec(s) Train Acc: 0.542080 Loss: 0.021008
[011/250] 107.98 sec(s) Train Acc: 0.561221 Loss: 0.020157
[012/250] 107.77 sec(s) Train Acc: 0.573368 Loss: 0.019602
[013/250] 107.92 sec(s) Train Acc: 0.593487 Loss: 0.018683
[014/250] 107.71 sec(s) Train Acc: 0.613906 Loss: 0.018037
[015/250] 107.70 sec(s) Train Acc: 0.623383 Loss: 0.017454
[016/250] 107.55 sec(s) Train Acc: 0.639516 Loss: 0.016814
[017/250] 107.73 sec(s) Train Acc: 0.648090 Loss: 0.0163

In [13]:
# save model
torch.save(model_best, 'model.torch')

  "type " + obj.__name__ + ". It won't be checked "


# Testing
利用剛剛 train 好的 model 進行 prediction

In [14]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [15]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
#         test_pred = model_best(data.cpu())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [16]:
#將結果寫入 csv 檔
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))

In [17]:
# 釋放記憶體
torch.cuda.empty_cache()