In [1]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader,Dataset
import time

In [2]:
# 定义一个读取图片的函数readfile()
def readfile(path,label):
    # bool label 代表我们是否需要返回y值
    image_dir = sorted(os.listdir(path))#sorted防止乱序
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    #x存储图片，每张图片都是128*128*3（三通道） 
    #y存储标签，每个标签大小为1
    y = np.zeros((len(image_dir)),dtype=np.uint8)
    for i,file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path,file))
        #利用cv2.resize（）将不同大小的图片统一为128*128
        x[i,:,:]=cv2.resize(img,(128,128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y
    else:
        return x
            

In [3]:
#分别将training set,validation set,testing set读入
workspace_dir='./food-11'
print("Reading Data")
train_x,train_y = readfile(os.path.join(workspace_dir,"training"), True)
print("Size of training set ={}".format(len(train_x)))
val_x,val_y= readfile(os.path.join(workspace_dir,"validation"), True)
print("Size of validation set ={}".format(len(val_x)))
test_x= readfile(os.path.join(workspace_dir,"testing"), False)
print("Size of testing set ={}".format(len(test_x)))


Reading Data
Size of training set =9866
Size of validation set =3430
Size of testing set =3347


In [4]:
#training 通过随机旋转水平翻转来进行 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),#随机翻转图片
    transforms.RandomRotation(15),#旋转图片
    transforms.ToTensor(),# 将图片变成张量，并把数值normalize到[0,1]
    
])
#testing 不需要数据增强
test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    
])
class ImgDataset(Dataset):
    def __init__(self,x,y=None,transform=None):
        self.x = x
        #label 是LongTensor型
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self,index):
        X =self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X,Y
        else:
            return X
            

In [5]:
batch_size = 8
train_set = ImgDataset(train_x,train_y,train_transform)
val_set = ImgDataset(val_x,val_y,test_transform)
train_loader = DataLoader(train_set,batch_size = batch_size,shuffle = True)#set to True to have the data reshuffled at every epoch (default: False).
val_loader = DataLoader(val_set,batch_size = batch_size,shuffle=False)

In [6]:
#一个卷积神经网络 再是一个全连接的前向传播神经网络
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier,self).__init__()
        #torch.nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding)
        #torch.nn.Maxpool2d(kernel_size,stride,padding)
        #input 维度[3,128,128]
        self.cnn = nn.Sequential(
             nn.Conv2d(3,64,3,1,1),#输出[64,128,128]
             nn.BatchNorm2d(64),#(防止梯度消失或爆炸) x-mean(x)/(Var(x)^0.5+eps )*gamma +beta       
             nn.ReLU(),#ReLU是将所有的负值都设为零'
             nn.MaxPool2d(2,2,0),#输出[64,64,64] class torch.nn.MaxPool2d (kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
             
             nn.Conv2d(64,128,3,1,1),#padding(int or tuple, optional) - 输入的每一条边补充0的层数，主要用于边缘处填充输出[128,64,64]
             nn.BatchNorm2d(128),
             nn.ReLU(),
             nn.MaxPool2d(2,2,0),#输出[128,32,32]
             
             nn.Conv2d(128,256,3,1,1),#输出[256,32,32]
             nn.BatchNorm2d(256),
             nn.ReLU(),
             nn.MaxPool2d(2,2,0),#输出[256,16,16]
             
             nn.Conv2d(256,512,3,1,1),#输出[512,16,16]
             nn.BatchNorm2d(512),
             nn.ReLU(),
             nn.MaxPool2d(2,2,0),#输出[512,8,8]
             
             nn.Conv2d(512,512,3,1,1),#输出[512,8,8]
             nn.BatchNorm2d(512),
             nn.ReLU(),
             nn.MaxPool2d(2,2,0),#输出[512,4,4]
             
             
        
         )#全连接的前向传播网络
        self.fc = nn.Sequential(
            nn.Linear(512*4*4,1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,11)#11个分类
        
        )
    def forward(self,x):
        out = self.cnn(x)
        out = out.view(out.size()[0],-1)
        return self.fc(out)



        
        
    

In [7]:


model = Classifier().cuda() #用cuda加速
loss = nn.CrossEntropyLoss() # 因为是分类任务，所以使用交叉熵损失 
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 使用Adam优化器
num_epoch = 30 #迭代次数

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 确保 model 是在 训练 model (开启 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 将模型参数的梯度 gradient 归零
        train_pred = model(data[0].cuda()) # 利用 model 得到预测的概率分布，这边实际上是调用模型的 forward 函数
        batch_loss = loss(train_pred, data[1].cuda()) # 计算 loss （注意 prediction 跟 label 必须同时在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每个参数的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新参数

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())#numpy.argmax(array, axis) 用于返回一个numpy数组中最大值的索引值。当一组中同时出现几个最大值时，返回第一个最大值的索引值。
        train_loss += batch_loss.item()
    
    #验证集val
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #将结果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))


[001/030] 39.21 sec(s) Train Acc: 0.237077 Loss: 0.270621 | Val Acc: 0.248688 loss: 0.261966
[002/030] 30.88 sec(s) Train Acc: 0.285627 Loss: 0.251641 | Val Acc: 0.263265 loss: 0.267512
[003/030] 31.22 sec(s) Train Acc: 0.322522 Loss: 0.239698 | Val Acc: 0.330904 loss: 0.233009
[004/030] 30.64 sec(s) Train Acc: 0.355869 Loss: 0.228165 | Val Acc: 0.311079 loss: 0.266576
[005/030] 29.77 sec(s) Train Acc: 0.390939 Loss: 0.216164 | Val Acc: 0.400583 loss: 0.212695
[006/030] 30.23 sec(s) Train Acc: 0.422765 Loss: 0.204593 | Val Acc: 0.400292 loss: 0.217368
[007/030] 30.06 sec(s) Train Acc: 0.455909 Loss: 0.193595 | Val Acc: 0.466764 loss: 0.190233
[008/030] 30.19 sec(s) Train Acc: 0.472836 Loss: 0.186033 | Val Acc: 0.474636 loss: 0.190676
[009/030] 30.21 sec(s) Train Acc: 0.500608 Loss: 0.177481 | Val Acc: 0.488338 loss: 0.192815
[010/030] 30.06 sec(s) Train Acc: 0.522502 Loss: 0.169804 | Val Acc: 0.534694 loss: 0.173239
[011/030] 30.18 sec(s) Train Acc: 0.546422 Loss: 0.161342 | Val Acc: 0

In [8]:
train_val_x = np.concatenate((train_x, val_x), axis=0) # 将train_x和val_x拼接起来 二维数组 axis=0，进行列的拼接
train_val_y = np.concatenate((train_y, val_y), axis=0) # 将train_y和val_y拼接起来
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)


In [9]:
model_best = Classifier().cuda() # cuda加速
loss = nn.CrossEntropyLoss() # 因为是分类任务，所以使用交叉熵损失 
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #将结果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))


[001/030] 36.40 sec(s) Train Acc: 0.244209 Loss: 0.266457
[002/030] 36.51 sec(s) Train Acc: 0.292870 Loss: 0.247876
[003/030] 37.08 sec(s) Train Acc: 0.331077 Loss: 0.234951
[004/030] 37.54 sec(s) Train Acc: 0.364170 Loss: 0.223731
[005/030] 41.52 sec(s) Train Acc: 0.404708 Loss: 0.209303
[006/030] 37.66 sec(s) Train Acc: 0.449985 Loss: 0.196587
[007/030] 37.20 sec(s) Train Acc: 0.473375 Loss: 0.185052
[008/030] 37.23 sec(s) Train Acc: 0.505490 Loss: 0.174797
[009/030] 37.25 sec(s) Train Acc: 0.531363 Loss: 0.165697
[010/030] 36.99 sec(s) Train Acc: 0.557010 Loss: 0.156604
[011/030] 37.57 sec(s) Train Acc: 0.586417 Loss: 0.148423
[012/030] 37.03 sec(s) Train Acc: 0.608830 Loss: 0.141157
[013/030] 36.88 sec(s) Train Acc: 0.631844 Loss: 0.132772
[014/030] 37.80 sec(s) Train Acc: 0.649970 Loss: 0.126077
[015/030] 37.59 sec(s) Train Acc: 0.667870 Loss: 0.118754
[016/030] 36.56 sec(s) Train Acc: 0.692238 Loss: 0.111874
[017/030] 36.48 sec(s) Train Acc: 0.702241 Loss: 0.106975
[018/030] 37.3

In [10]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


In [13]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        # 预测值中概率最大的下标即为模型预测的食物标签
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)


In [14]:
#将预测结果写入 csv 
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))
