In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data
import time

In [2]:
'''Initialize Params'''
epochs = 100
learning_rate = 0.03
momentum = 0.5
log_interval = 10
batch_size = 128

In [3]:
!unzip "/content/drive/MyDrive/food.zip"

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  inflating: food/testing/3315.jpg   
  inflating: food/testing/1502.jpg   
  inflating: food/testing/1264.jpg   
  inflating: food/testing/0152.jpg   
  inflating: food/testing/2745.jpg   
  inflating: food/testing/2023.jpg   
  inflating: food/testing/0634.jpg   
  inflating: food/testing/0620.jpg   
  inflating: food/testing/2037.jpg   
  inflating: food/testing/3329.jpg   
  inflating: food/testing/2989.jpg   
  inflating: food/testing/2751.jpg   
  inflating: food/testing/1258.jpg   
  inflating: food/testing/0146.jpg   
  inflating: food/testing/1933.jpg   
  inflating: food/testing/0393.jpg   
  inflating: food/testing/2584.jpg   
  inflating: food/testing/2590.jpg   
  inflating: food/testing/1099.jpg   
  inflating: food/testing/0387.jpg   
  inflating: food/testing/1927.jpg   
  inflating: food/testing/3103.jpg   
  inflating: food/testing/1714.jpg   
  inflating: food/testing/1072.jpg   
  inflating: food/testing/0378.jpg   
  inflati

In [4]:
'''Load Data'''
def readFile(path,label):
    image_dir = sorted(os.listdir(path))
    # x stores photos
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    # y stores labels
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y
    else:
        return x

train_x, train_y = readFile('/content/food/training',True)
val_x, val_y = readFile('/content/food/validation',True)
test_x = readFile('/content/food/testing',False)
print("Reading data: ")
print("Size of training data = {}".format(len(train_x)))
print("Size of validation data = {}".format(len(val_x)))
print("Size of Testing data = {}".format(len(test_x)))

Reading data: 
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347


In [5]:
train_x = torch.tensor(train_x)
# print(train_x.shape)  -> torch.Size([9866, 3, 128, 128])
train_x = train_x.transpose(1,3).float()
# print(train_x.shape) -> torch.Size([9866, 128, 128, 3])
train_y = torch.tensor(train_y)
val_x = torch.tensor(val_x)
val_x = val_x.transpose(1, 3).float()
val_y = torch.tensor(val_y)

train_dataset = Data.TensorDataset(train_x,train_y)
val_dataset = Data.TensorDataset(val_x,val_y)

train_loader = Data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True) 

In [6]:
'''Create Model'''
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 维度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # 输出[64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 输出[64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # 输出[128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 输出[128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # 输出[256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 输出[256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # 输出[512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # 输出[512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # 输出[512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # 输出[512, 4, 4]
        )
        # 全连接的前向传播神经网络
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)   # 最后是11个分类
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)  # 摊平成1维
        return self.fc(out)

In [7]:
'''Initialize the network'''
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [8]:
train_on_gpu = torch.cuda.is_available()

if train_on_gpu:
    print("CUDA is available! Training on GPU...")
else:
    print("CUDA is not available. Training on CPU...")

CUDA is available! Training on GPU...


In [12]:
model = Net().cuda() #用cuda加速
loss = nn.CrossEntropyLoss() # 因为是分类任务，所以使用交叉熵损失 
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # 使用Adam优化器
num_epoch = 30 #迭代次数

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 确保 model 是在 训练 model (开启 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 将模型参数的梯度 gradient 归零
        train_pred = model(data[0].cuda()) # 利用 model 得到预测的概率分布，这边实际上是调用模型的 forward 函数
        batch_loss = loss(train_pred, data[1].cuda()) # 计算 loss （注意 prediction 跟 label 必须同时在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每个参数的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新参数

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    #验证集val
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #将结果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_dataset.__len__(), train_loss/train_dataset.__len__(), val_acc/val_dataset.__len__(), val_loss/val_dataset.__len__()))

[001/030] 70.39 sec(s) Train Acc: 0.353132 Loss: 0.014584 | Val Acc: 0.376676 loss: 0.014564
[002/030] 70.11 sec(s) Train Acc: 0.504561 Loss: 0.011548 | Val Acc: 0.490671 loss: 0.011945
[003/030] 70.10 sec(s) Train Acc: 0.567403 Loss: 0.009944 | Val Acc: 0.499708 loss: 0.011667
[004/030] 70.07 sec(s) Train Acc: 0.632475 Loss: 0.008503 | Val Acc: 0.557434 loss: 0.010314
[005/030] 69.95 sec(s) Train Acc: 0.691871 Loss: 0.007173 | Val Acc: 0.547813 loss: 0.011203
[006/030] 69.98 sec(s) Train Acc: 0.741334 Loss: 0.005942 | Val Acc: 0.531195 loss: 0.012083
[007/030] 69.99 sec(s) Train Acc: 0.800628 Loss: 0.004848 | Val Acc: 0.542274 loss: 0.011988
[008/030] 69.98 sec(s) Train Acc: 0.838334 Loss: 0.003791 | Val Acc: 0.585423 loss: 0.010934
[009/030] 69.94 sec(s) Train Acc: 0.930266 Loss: 0.001962 | Val Acc: 0.540233 loss: 0.013987
[010/030] 70.03 sec(s) Train Acc: 0.956923 Loss: 0.001330 | Val Acc: 0.589213 loss: 0.012238
[011/030] 70.01 sec(s) Train Acc: 0.974255 Loss: 0.000980 | Val Acc: 0

KeyboardInterrupt: ignored