# 用CNN实现图片的分类

In [0]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

## 读取图像

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
def readfile(path, label):
    '''
    读取图像
    parameter:
        @path: 文件的路径
        @label: 布尔值，表示是否需要返回标签
    return:
        读取图像后的array
    '''
    img_dir = sorted((os.listdir(path)))
    x = np.zeros((len(img_dir), 128, 128, 3), dtype=np.uint8) # 将每张图像转换成128 × 128 × 3的矩阵
    y = np.zeros((len(img_dir)), dtype=np.uint8) # 图像输出的标签
    for i, file in enumerate(img_dir):
        img = cv2.imread(os.path.join(path, file)) # cv2.imread()返回的是[height, width, channel]
        x[i, :, :] = cv2.resize(img, (128, 128)) # 将图像的高度和宽度的像素值指定为128
        if label:
          y[i] = int(file.split("_")[0])

    if label:
        return x, y
    else:
        return x
        

In [0]:
# !unzip /content/drive/My\ Drive/data/hw3/food-11.zip -d /content/drive/My\ Drive/data/hw3/food-11

In [5]:
# 读取图像
data_dir = '/content/drive/My Drive/data/hw3/food-11/food-11'
train_x, train_y = readfile(os.path.join(data_dir, "training"), True) # 训练数据
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(data_dir, "validation"), True) # 验证集
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(data_dir, "testing"), False) # 测试集
print("Size of testing data = {}".format(len(test_x)))

Size of training data = 9866
Size of validation data = 3430
Size of testing data = 3347


## 处理数据
主要利用pytorch的torch.utils.data来处理数据。
这里需要用到data augment技术来增强源数据

In [0]:
# 对trianing data做数据增强
train_transform = transforms.Compose([
    transforms.ToPILImage(), # 将tensor或者array的数据转换成PIL Image类型数据
    transforms.RandomHorizontalFlip(), # 随机对PIL图片进行水平翻转
    transforms.RandomRotation(15), # 将图片旋转15 degree
    transforms.ToTensor(), # 将PIL Image或者narray转换成tensor
])

# 测试数据不需要做数据增强，只需转换格式
test_transform = transforms.Compose([
    transforms.ToPILImage(), 
    transforms.ToTensor(),
])

In [0]:
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y) # pytorch tensor默认的类型是double，涉及到图像故保存更高精度
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X


In [0]:
# 分割数据集
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

## 定义Model (CNN)

In [0]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        
        self.cnn = nn.Sequential(
            # 第一层网络
            # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
            # input的图像维度[3, 128, 128]
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64), # 在cnn的卷积层之后基本都要连接一个BatchNorm函数用来对数据进行数据的归一化处理，使得数据在进行ReLu之前不会因为数据过大而导致网络性能的不稳定
            nn.ReLU(),
            # torch.nn.MaxPool2d(kernel_size, stride, padding)
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            # 第二层网络
            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            # 第三层网络
            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            # 第四层网络
            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            # 第五层网络
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        # 全连接层
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

### CNN Model Training

In [18]:
model = Classifier().cuda()

loss = nn.CrossEntropyLoss() # loss采用crossEntropy是因为该问题是个分类问题
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # cnn model
    model.train() 

    for i, data in enumerate(train_loader):
        optimizer.zero_grad() 
        # data[0]是x，data[1]是计算出的y
        train_pred = model(data[0].cuda()) # 利用model预测概率分布，实际上调用model的forward
        batch_loss = loss(train_pred, data[1].cuda()) # 计算loss
        batch_loss.backward() # 利用back propagation计算每个参数的gradient
        optimizer.step() # 以optimizer 用gradient更新参数值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    # torch.no_grad()表示不需要进行梯度计算
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 16.58 sec(s) Train Acc: 0.234036 Loss: 0.017736 | Val Acc: 0.250729 loss: 0.016891
[002/030] 16.61 sec(s) Train Acc: 0.324853 Loss: 0.015006 | Val Acc: 0.226822 loss: 0.019867
[003/030] 16.58 sec(s) Train Acc: 0.393371 Loss: 0.013730 | Val Acc: 0.289796 loss: 0.017929
[004/030] 16.56 sec(s) Train Acc: 0.434928 Loss: 0.012776 | Val Acc: 0.332945 loss: 0.016147
[005/030] 16.62 sec(s) Train Acc: 0.466755 Loss: 0.012065 | Val Acc: 0.425948 loss: 0.013626
[006/030] 16.57 sec(s) Train Acc: 0.497669 Loss: 0.011514 | Val Acc: 0.476676 loss: 0.012291
[007/030] 16.63 sec(s) Train Acc: 0.528482 Loss: 0.010779 | Val Acc: 0.418076 loss: 0.015247
[008/030] 16.53 sec(s) Train Acc: 0.555950 Loss: 0.010212 | Val Acc: 0.527697 loss: 0.011207
[009/030] 16.57 sec(s) Train Acc: 0.577640 Loss: 0.009604 | Val Acc: 0.291545 loss: 0.028342
[010/030] 16.59 sec(s) Train Acc: 0.607440 Loss: 0.008927 | Val Acc: 0.469679 loss: 0.012972
[011/030] 16.58 sec(s) Train Acc: 0.622846 Loss: 0.008544 | Val Acc: 0

## 定义Model (DNN)

In [0]:
class DNN(nn.Module):
  def __init__(self):
    super(DNN, self).__init__()

    self.dnn = nn.Sequential(
        nn.Linear(3*128*128, 1024),
        nn.ELU(),

        nn.Linear(1024, 1024), 
        nn.ELU(),

        nn.Linear(1024, 1024), 
        nn.ELU(),

        nn.Linear(1024, 512),
        nn.ELU(),

        nn.Linear(512, 256),
        nn.ELU(),


        nn.Linear(256, 128),
        nn.ELU(),

        nn.Linear(128, 11),
    )

  def forward(self, x):
    x = x.view(-1, 3 * 128 * 128)
    out = self.dnn(x)
    #print(out.shape)
    #out = out.view(out.size()[0], -1)
    return out

## DNN Model Training

In [23]:
model_dnn = DNN().cuda()

loss = nn.CrossEntropyLoss() # loss采用crossEntropy是因为该问题是个分类问题
optimizer = torch.optim.Adam(model_dnn.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # dnn model
    model_dnn.train() 

    for i, data in enumerate(train_loader):
        optimizer.zero_grad() 
        # data[0]是x，data[1]是计算出的y
        train_pred = model_dnn(data[0].cuda()) # 利用model预测概率分布，实际上调用model的forward
        batch_loss = loss(train_pred, data[1].cuda()) # 计算loss
        batch_loss.backward() # 利用back propagation计算每个参数的gradient
        optimizer.step() # 以optimizer 用gradient更新参数值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model_dnn.eval()
    # torch.no_grad()表示不需要进行梯度计算
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model_dnn(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 6.31 sec(s) Train Acc: 0.161666 Loss: 0.018364 | Val Acc: 0.165889 loss: 0.018173
[002/030] 6.29 sec(s) Train Acc: 0.192682 Loss: 0.017531 | Val Acc: 0.219242 loss: 0.017161
[003/030] 6.29 sec(s) Train Acc: 0.215082 Loss: 0.017174 | Val Acc: 0.223907 loss: 0.016746
[004/030] 6.27 sec(s) Train Acc: 0.215589 Loss: 0.017100 | Val Acc: 0.215452 loss: 0.016883
[005/030] 6.31 sec(s) Train Acc: 0.227549 Loss: 0.016887 | Val Acc: 0.227405 loss: 0.016696
[006/030] 6.23 sec(s) Train Acc: 0.228360 Loss: 0.016817 | Val Acc: 0.216327 loss: 0.017023
[007/030] 6.34 sec(s) Train Acc: 0.222380 Loss: 0.016878 | Val Acc: 0.216327 loss: 0.016904
[008/030] 6.32 sec(s) Train Acc: 0.235354 Loss: 0.016735 | Val Acc: 0.219242 loss: 0.016653
[009/030] 6.24 sec(s) Train Acc: 0.233935 Loss: 0.016702 | Val Acc: 0.225948 loss: 0.016513
[010/030] 6.30 sec(s) Train Acc: 0.234239 Loss: 0.016577 | Val Acc: 0.228280 loss: 0.016450
[011/030] 6.28 sec(s) Train Acc: 0.240016 Loss: 0.016558 | Val Acc: 0.249271 los

将validation set和training set共同训练

In [0]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [0]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_best.parameters(), lr = 0.001)

for epoch in range(num_epoch):
  epoch_start_time = time.time()
  train_acc = 0.0
  train_loss = 0.0

  model_best.train()

  for i, data in enumerate(train_val_loader):
    optimizer.zero_grad()
    train_pred = model_best(data[0].cuda())
    batch_loss = loss(train_pred, data[1].cuda())
    batch_loss.backward()
    optimizer.step()

    train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
    train_loss += batch_loss.item()

  print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

## 测试

In [0]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

model_best.eval()
prediction = []

with torch.no_grad():
  for i, data in enumerate(test_loader):
    test_pred = model_best(data.cuda())
    test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
    for y in test_label:
      prediction.append(y)



## 保存结果

In [0]:
with open('predict_cnn.csv', 'w') as f:
  f.write('Id,Category\n')
  for i, y in enumerate(prediction):
    f.write('{},{}\n'.format(i,y))

In [0]:
!ls