In [1]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

ImageFolder way of loading data.
Load faster but train slower.

In [None]:
# 将图像RGB三个通道的像素值分别减去0.5再除以0.5，从而将所有像素值
# 固定到[-1.0, 1.0]范围内
#加上transforms
ROOT_TRAIN = r'D:\university\大四前进（1）\机器学习\NTU_HYLee_ML20\4.CNN\food-11\training'
ROOT_VAL=r'D:\university\大四前进（1）\机器学习\NTU_HYLee_ML20\4.CNN\food-11\validation'
normalize=transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5])
train_transform = transforms.Compose([
    transforms.Resize((128, 128)), # 裁剪为128 * 128
    transforms.RandomVerticalFlip(), # 随机垂直翻转
    transforms.ToTensor(), # 将0-127范围的像素转为0-1.0范围的tensor
    normalize])
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])
train_dataset = ImageFolder(ROOT_TRAIN, transform=train_transform)
val_dataset = ImageFolder(ROOT_VAL, transform=test_transform)
batch_size=8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

#### 读取数据

In [2]:
def readfile(path, label):
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x, y
    else:
        return x
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))

Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347


In [15]:
#data augumemtation,need for training
train_transform=transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
])
test_transform=transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
])
# rewrite __len__ and __getitem__ method
# encapsulate dataset
# makes it easier for training
class ImgDataset(Dataset):
    def __init__(self,x,y=None,transform=None):
        self.x=x
        self.y=y
        if y is not None:
            self.y=torch.LongTensor(y)
        self.transform=transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self,index):
        X=self.x[index]
        if self.transform is not None:
            X=self.transform(X)
        if self.y is not None:
            Y=self.y[index]
            return X,Y
        else:
            return X

In [16]:
batch_size=8
#Dataset
train_set=ImgDataset(train_x,train_y,train_transform)
val_set=ImgDataset(val_x,val_y,test_transform)
#DataLoader
train_loader=DataLoader(train_set,batch_size=batch_size,shuffle=True)
val_loader=DataLoader(val_set,batch_size=batch_size,shuffle=False)

#### 模型

In [6]:
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# feature map:(n+2p-f)/s+1,n-original size;p-padding;f-filter size;s-stride
# torch.nn.MaxPool2d(kernel_size, stride, padding)
# initial dimension[3,128,128]

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)# flatten the conv layer
        return self.fc(out)


#### 训练

In [7]:
# instance the model, loss, optimizer
model=Classifier().cuda()
loss=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(), lr=0.001)
num_epoch=10 # the optimum value is 43

In [25]:
# the training process
for epoch in range(num_epoch):
    epoch_start_time=time.time()
    train_acc=0.0
    train_loss=0.0
    val_acc=0.0
    val_loss=0.0
    
    model.train()
    for i,data in enumerate(train_loader):
        optimizer.zero_grad()
        train_pred=model(data[0].cuda())
        batch_loss=loss(train_pred,data[1].cuda())
        batch_loss.backward()
        optimizer.step()
        
        train_acc+=np.sum(np.argmax(train_pred.cpu().data.numpy(),axis=1)==data[1].numpy())
        train_loss+=batch_loss.item()
        
    model.eval()
    with torch.no_grad():
        for i,data in enumerate(val_loader):
            val_pred=model(data[0].cuda())
            batch_loss=loss(val_pred,data[1].cuda())
            val_acc+=np.sum(np.argmax(val_pred.cpu().data.numpy(),axis=1)==data[1].numpy())
            val_loss+=batch_loss.item()
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 41, num_epoch+20, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[041/050] 88.82 sec(s) Train Acc: 0.783296 Loss: 0.076865 | Val Acc: 0.583673 loss: 0.189175
[042/050] 89.79 sec(s) Train Acc: 0.792824 Loss: 0.074142 | Val Acc: 0.601458 loss: 0.182420
[043/050] 90.21 sec(s) Train Acc: 0.807217 Loss: 0.070327 | Val Acc: 0.585714 loss: 0.180024
[044/050] 90.77 sec(s) Train Acc: 0.807521 Loss: 0.068512 | Val Acc: 0.589213 loss: 0.186801
[045/050] 90.80 sec(s) Train Acc: 0.821407 Loss: 0.063707 | Val Acc: 0.560058 loss: 0.213665
[046/050] 90.98 sec(s) Train Acc: 0.829820 Loss: 0.061475 | Val Acc: 0.590962 loss: 0.196705
[047/050] 92.11 sec(s) Train Acc: 0.839246 Loss: 0.057448 | Val Acc: 0.604665 loss: 0.207144
[048/050] 91.91 sec(s) Train Acc: 0.846037 Loss: 0.055345 | Val Acc: 0.587464 loss: 0.209641
[049/050] 91.56 sec(s) Train Acc: 0.860328 Loss: 0.052074 | Val Acc: 0.616618 loss: 0.196142
[050/050] 91.79 sec(s) Train Acc: 0.861038 Loss: 0.050401 | Val Acc: 0.607872 loss: 0.208604


#### 再次训练

In [26]:
# 同时使用train_set和val_set的数据
train_val_x=np.concatenate((train_x,val_x),axis=0)
train_val_y=np.concatenate((train_y,val_y),axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [27]:
model_best=Classifier().cuda()
loss=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model_best.parameters(), lr=0.001)
num_epoch=10

In [None]:
# the training process
for epoch in range(num_epoch):
    epoch_start_time=time.time()
    train_acc=0.0
    train_loss=0.0
    
    model_best.train()
    for i,data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_val_pred=model_best(data[0].cuda())
        batch_loss=loss(train_val_pred,data[1].cuda())
        batch_loss.backward()
        optimizer.step()
        
        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

#### 测试

In [31]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [32]:
model.eval()
predictions=[]
with torch.no_grad():
    for i,data in enumerate(test_loader):
        test_pred=model(data.cuda())
        test_label=np.argmax(test_pred.cpu().data.numpy(),axis=1)
        for y in test_label:
            predictions.append(y)

In [35]:
with open("predict.csv",'w') as f:
    f.write('Id,Category\n')
    for i,y in enumerate(predictions):
        f.write('{},{}\n'.format(i,y))