# 下载数据集&导入包

In [None]:
!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
!unzip food-11.zip # 解壓縮

In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import time

## Read Image
利用OpenCV(cv2)读入照片并存放在numpy array中

In [3]:
def readfile(path, label):
  # label是一个布尔值，代表需不需要回传y值
  image_dir = sorted(os.listdir(path))
  x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
  y = np.zeros((len(image_dir)), dtype=np.uint8)
  for i, file in enumerate(image_dir):
    img = cv2.imread(os.path.join(path, file))
    x[i, :, :] = cv2.resize(img, (128, 128))
    if label:
      y[i] = int(file.split("_")[0])
  if label:
    return x, y
  else:
    return x

In [4]:
# 分别将training set、validation set、testing set用readfile函数读入
workspace_dir = './food-11'
print('reading data')
train_x, train_y = readfile(os.path.join(workspace_dir, 'training'), True)
print('size of training data = {}'.format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, 'validation'), True)
print('size of validation data = {}'.format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, 'testing'), False)
print('size of testing data = {}'.format(len(test_x)))

reading data
size of training data = 9866
size of validation data = 3430
size of testing data = 3347


# Dataset
利用pytorch的Dataset来“包装”data，是后续的training和testing更为方便

In [5]:
# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

class ImgDataset(Dataset):
  def __init__(self, x, y=None, transform=None):
    self.x = x
    self.y = y
    if y is not None:
      self.y = torch.LongTensor(y)
    self.transform = transform
  
  def __len__(self):
    return len(self.x)

  def __getitem__(self, index):
    X = self.x[index]
    if self.transform is not None:
      X = self.transform(X)
    if self.y is not None:
      Y = self.y[index]
      return X, Y
    else:
      return X

In [6]:
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# Model

In [7]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# Training
使用training set训练，并使用validation set寻找好的参数

In [8]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[001/030] 48.31 sec(s) Train Acc: 0.238901 Loss: 0.018177 | Val Acc: 0.232362 loss: 0.016908
[002/030] 47.81 sec(s) Train Acc: 0.330833 Loss: 0.014988 | Val Acc: 0.322449 loss: 0.015778
[003/030] 47.67 sec(s) Train Acc: 0.391141 Loss: 0.013847 | Val Acc: 0.352770 loss: 0.014747
[004/030] 47.74 sec(s) Train Acc: 0.425603 Loss: 0.012892 | Val Acc: 0.418950 loss: 0.012953
[005/030] 47.77 sec(s) Train Acc: 0.472735 Loss: 0.011952 | Val Acc: 0.437609 loss: 0.012704
[006/030] 47.58 sec(s) Train Acc: 0.500912 Loss: 0.011399 | Val Acc: 0.372303 loss: 0.014520
[007/030] 47.69 sec(s) Train Acc: 0.527772 Loss: 0.010691 | Val Acc: 0.370554 loss: 0.018394
[008/030] 47.70 sec(s) Train Acc: 0.550071 Loss: 0.010082 | Val Acc: 0.462099 loss: 0.012944
[009/030] 47.61 sec(s) Train Acc: 0.573485 Loss: 0.009646 | Val Acc: 0.512536 loss: 0.011806
[010/030] 47.67 sec(s) Train Acc: 0.600041 Loss: 0.009040 | Val Acc: 0.477843 loss: 0.013142
[011/030] 47.59 sec(s) Train Acc: 0.618488 Loss: 0.008685 | Val Acc: 0

得到好的参数后，使用training set和validation set共同训练，data量变多，模型效果较好

In [9]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [10]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/030] 56.77 sec(s) Train Acc: 0.257972 Loss: 0.016756
[002/030] 56.79 sec(s) Train Acc: 0.385229 Loss: 0.013763
[003/030] 57.04 sec(s) Train Acc: 0.449985 Loss: 0.012375
[004/030] 57.05 sec(s) Train Acc: 0.503986 Loss: 0.011245
[005/030] 56.95 sec(s) Train Acc: 0.552046 Loss: 0.010172
[006/030] 57.00 sec(s) Train Acc: 0.582732 Loss: 0.009434
[007/030] 56.85 sec(s) Train Acc: 0.621089 Loss: 0.008568
[008/030] 56.66 sec(s) Train Acc: 0.643577 Loss: 0.008027
[009/030] 56.79 sec(s) Train Acc: 0.665012 Loss: 0.007540
[010/030] 56.96 sec(s) Train Acc: 0.683890 Loss: 0.007177
[011/030] 56.84 sec(s) Train Acc: 0.701940 Loss: 0.006722
[012/030] 56.96 sec(s) Train Acc: 0.714801 Loss: 0.006457
[013/030] 57.07 sec(s) Train Acc: 0.734356 Loss: 0.005925
[014/030] 56.92 sec(s) Train Acc: 0.748721 Loss: 0.005672
[015/030] 56.96 sec(s) Train Acc: 0.771134 Loss: 0.005200
[016/030] 56.86 sec(s) Train Acc: 0.779332 Loss: 0.004935
[017/030] 56.80 sec(s) Train Acc: 0.787229 Loss: 0.004739
[018/030] 56.7

# Testing

In [11]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [13]:
model_best.eval()
prediction = []
with torch.no_grad():
  for i, data in enumerate(test_loader):
    test_pred = model_best(data.cuda())
    test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
    for y in test_label:
      prediction.append(y)

In [14]:
# 结果写入csv
with open('prediction.csv', 'w') as f:
  f.write('Id,Category\n')
  for i, y in enumerate(prediction):
    f.write('{},{}\n'.format(i, y)) 