## 任务描述：

### ✓代码跑通
### 请在 MyCNN类中补全代码，构造卷积神经网络，保证程序跑通。
### ✓调优
### 思考并动手进行调优，以在验证集上的准确率为评价指标，验证集上准确率越高，得分越高！


### *本实践旨在通过一个美食分类的案列，让大家理解和掌握如何使用飞桨动态图搭建一个卷积神经网络。*



In [64]:

!pip install Augmentor
!pip install torch

Looking in indexes: http://mirrors.aliyun.com/pypi/simple/
You should consider upgrading via the '/Users/vincent/.pyenv/versions/3.6.3/envs/torch_3.6.3/bin/python3.6 -m pip install --upgrade pip' command.[0m
Looking in indexes: http://mirrors.aliyun.com/pypi/simple/
You should consider upgrading via the '/Users/vincent/.pyenv/versions/3.6.3/envs/torch_3.6.3/bin/python3.6 -m pip install --upgrade pip' command.[0m


In [65]:
import os
import zipfile
import random
import json
import sys
import numpy as np
from PIL import Image
from PIL import ImageEnhance
import matplotlib.pyplot as plt
import shutil
import Augmentor
import glob
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
%matplotlib notebook

In [66]:
'''
参数配置
'''
train_parameters = {
    "input_size": [3, 128, 128],                                #输入图片的shape
    "class_dim": -1,                                          #分类数
    "src_path":"data/data42610/foods.zip",                    #原始数据集路径
    "target_path":"data/",                     #要解压的路径
    "train_list_path": "data/train.txt",       #train.txt路径
    "eval_list_path": "data/eval.txt",         #eval.txt路径
    "readme_path": "data/readme.json",         #readme.json路径
    "label_dict":{},                                          #标签字典
    "num_epochs": 50,                                          #训练轮数
    "train_batch_size": 64,                                   #训练时每个批次的大小
    "learning_strategy": {                                    #优化函数相关的配置
        "lr": 0.0001                                          #超参数学习率
    } 
}

# **一、数据准备**

### （1）解压原始数据集

### （2）按照比例划分训练集与验证集

### （3）乱序，生成数据列表

### （4）构造训练数据集提供器和验证数据集提供器

In [67]:

def unzip_data(src_path,target_path):
    '''
    解压原始数据集，将src_path路径下的zip包解压至target_path目录下
    '''
    if(not os.path.isdir(target_path + "foods")):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()

In [68]:

def get_data_list(target_path, train_list_path, eval_list_path):
    '''
    生成数据列表
    '''
    #存放所有类别的信息
    class_detail = []
    #获取所有类别保存的文件夹名称
    data_list_path=target_path+"foods/"
    class_dirs = os.listdir(data_list_path)  
    #总的图像数量
    all_class_images = 0
    #存放类别标签
    class_label=0
    #存放类别数目
    class_dim = 0
    #存储要写进eval.txt和train.txt中的内容
    trainer_list=[]
    eval_list=[]
    #读取每个类别
    for class_dir in class_dirs:
        if class_dir != ".DS_Store":
            class_dim += 1
            #每个类别的信息
            class_detail_list = {}
            eval_sum = 0
            trainer_sum = 0
            #统计每个类别有多少张图片
            class_sum = 0
            #获取类别路径 
            path = data_list_path  + class_dir
            # 获取所有图片
            img_paths = os.listdir(path)
            for img_path in img_paths:                                  # 遍历文件夹下的每个图片
                name_path = path + '/' + img_path                       # 每张图片的路径
                if class_sum % 8 == 0:                                  # 每8张图片取一个做验证数据
                    eval_sum += 1                                       # test_sum为测试数据的数目
                    eval_list.append(name_path + "\t%d" % class_label + "\n")
                else:
                    trainer_sum += 1 
                    trainer_list.append(name_path + "\t%d" % class_label + "\n")#trainer_sum测试数据的数目
                class_sum += 1                                          #每类图片的数目
                all_class_images += 1                                   #所有类图片的数目
             
            # 说明的json文件的class_detail数据
            class_detail_list['class_name'] = class_dir             #类别名称
            class_detail_list['class_label'] = class_label          #类别标签
            class_detail_list['class_eval_images'] = eval_sum       #该类数据的测试集数目
            class_detail_list['class_trainer_images'] = trainer_sum #该类数据的训练集数目
            class_detail.append(class_detail_list)  
            #初始化标签列表
            train_parameters['label_dict'][str(class_label)] = class_dir
            class_label += 1 
            
    #初始化分类数
    train_parameters['class_dim'] = class_dim
    
    #乱序  
    random.shuffle(eval_list)
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
            
    random.shuffle(trainer_list)
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 

    # 说明的json文件信息
    readjson = {}
    readjson['all_class_name'] = data_list_path                  #文件父目录
    readjson['all_class_images'] = all_class_images
    readjson['class_detail'] = class_detail
    jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
    with open(train_parameters['readme_path'],'w') as f:
        f.write(jsons)
    print ('生成数据列表完成！')

In [69]:
# def custom_reader(file_list):
#     '''
#     自定义reader 
#     '''
#     def reader():
#         with open(file_list, 'r') as f:
#             lines = [line.strip() for line in f]
#             for line in lines:
#                 img_path, lab = line.strip().split('\t')
#                 img = Image.open(img_path) 
#                 if img.mode != 'RGB': 
#                     img = img.convert('RGB') 
#                 img = img.resize((64, 64), Image.BILINEAR)
#                 img = np.array(img).astype('float32') 
#                 img = img.transpose((2, 0, 1))  # HWC to CHW 
#                 img = img/255                   # 像素值归一化 
#                 yield img, int(lab) 
#     return reader


In [70]:
'''
参数初始化
'''
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']

'''
解压原始数据到指定路径
'''
unzip_data(src_path, target_path)

'''
划分训练集与验证集，乱序，生成数据列表
'''
#每次生成数据列表前，首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
    
#生成数据列表   
get_data_list(target_path,train_list_path,eval_list_path)


生成数据列表完成！


In [71]:
class Food_dataset(Dataset):
    def __init__(self, path, transform=None):
        with open(path, 'r') as f:
            img_idxs = []
            for line in f:
                line = line.strip()
                img_idx, label = line.split()
                img_idxs.append((img_idx, label))
        self.img_idxs = img_idxs
        self.transform = transform
    def __getitem__(self, idx):
        img_idx, label = self.img_idxs[idx]
        img = Image.open(img_idx).convert('RGB')
        # img = img.resize((64, 64), Image.BILINEAR)
        img = img.resize((128, 128), Image.BILINEAR) # 64 x 64?
        if self.transform:
            img = self.transform(img)
        return img, int(label)
    def __len__(self):
        return len(self.img_idxs)

In [72]:
# prepare data:
train_transform = transforms.Compose([
    # transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), #随机翻转
    transforms.RandomRotation(15), #随机旋转
    transforms.ToTensor(), # to Tensor， normalize到[0,1]
])
# testing，不需要数据增强（data augmentation）
test_transform = transforms.Compose([
    # transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
train_dataset = Food_dataset(train_list_path,transform=train_transform)
test_dateset = Food_dataset(eval_list_path,transform=test_transform)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dateset, batch_size=batch_size, shuffle=True)

In [73]:
'''
构造数据提供器
'''
# train_reader = paddle.batch(custom_reader(train_list_path),
#                             batch_size=batch_size,
#                             drop_last=True)      #若设置为True，则当最后一个batch不等于batch_size时，丢弃最后一个batch；若设置为False，则不会。默认值为False。
# eval_reader = paddle.batch(custom_reader(eval_list_path),
#                             batch_size=batch_size,
#                             drop_last=True)

'\n构造数据提供器\n'

# **二、模型配置**

### 请完成MyCNN模型

In [74]:
#定义卷积分类网络
class MyCNN(nn.Module):
    def __init__(self, output_num):
        super(MyCNN, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # 輸出[64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 輸出[64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # 輸出[128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 輸出[128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # 輸出[256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # 輸出[256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # 輸出[512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # 輸出[512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # 輸出[512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # 輸出[512, 4, 4]
        )
        # 全連接的前向傳播神經網絡
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, output_num)   # 5分类
        )

    def forward(self, input):
        y = self.cnn(input)
        y = y.view(y.size()[0], -1)  # 攤平成1維
        return self.fc(y)

# **三、模型训练 && 四、模型评估**

In [75]:
def draw_train_process(title, iters, costs, accs, label_cost, lable_acc):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel("loss/acc", fontsize=20)
    plt.plot(iters, costs, color='red', label=label_cost) 
    plt.plot(iters, accs, color='green', label=lable_acc) 
    plt.legend()
    plt.grid()
    plt.show()

# def draw_process(title, color, iters, data, label):
#     plt.title(title, fontsize=24)
#     plt.xlabel("iter", fontsize=20)
#     plt.ylabel(label, fontsize=20)
#     plt.plot(iters, data, color=color,label=label) 
#     plt.legend()
#     plt.grid()
#     plt.show()

In [76]:
'''
模型训练
'''
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MyCNN(train_parameters['class_dim']).to(device)
loss = nn.CrossEntropyLoss() # 因爲是分類任務，所以使用交叉熵損失 
optimizer = torch.optim.Adam(model.parameters(), train_parameters['learning_strategy']['lr']) # 使用Adam優化器
num_epoch = train_parameters['num_epochs'] #迭代次數

train_iter=0
train_iters=[]
train_costs=[]
train_accs=[]

print(train_parameters['class_dim'])
print(train_parameters['label_dict'])

for epoch in range(num_epoch):
    train_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 确保 model 是在 训练 model (开启 Dropout 等...)
    for i, (x, y) in enumerate(train_dataloader):
        # print(type(x), type(y))
        y_pred = model(x.to(device))  # 利用 model 得到 预测的概率分布，这边实际上是 调用模型的 forward 函數
        batch_loss = loss(y_pred, y.to(device))  # 计算loss （注意 prediction 跟 label 必须同时在 CPU 或是 GPU 上）
        optimizer.zero_grad() # 用 optimizer 将模型参数的梯度 gradient 清零
        batch_loss.backward() # 利用 back propagation 算出每个参数的 gradient
        optimizer.step()      # 以 optimizer 用 gradient 更新参数
        batch_acc = np.sum(np.argmax(y_pred.cpu().data.numpy(), axis=1) == y.numpy())
        batch_loss = batch_loss.item()
        train_acc += batch_acc
        train_loss += batch_loss

        train_iter += batch_size
        train_iters.append(train_iter)
        train_costs.append(batch_loss / len(y))
        train_accs.append(batch_acc / len(y))
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f '% 
            (epoch + 1, num_epoch, time.time()- train_start_time, 
            train_acc / len(train_dataset), train_loss/len(train_dataset)))
    draw_train_process('training', train_iters, train_costs, train_accs, 'cost', 'acc')
    
    # 验证集 test
    test_start_time = time.time()
    model.eval()
    with torch.no_grad():
        for i, (x, y) in enumerate(test_dataloader):
            y_pred = model(x.to(device))
            batch_loss = loss(y_pred, y.to(device))
            val_acc += np.sum(np.argmax(y_pred.cpu().data.numpy(), axis=1) == y.numpy())
            val_loss += batch_loss.item()
        val_acc = val_acc / len(test_dateset)
        val_loss = val_loss / len(test_dateset)

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Val Acc: %3.6f loss: %3.6f' % 
            (epoch + 1, num_epoch, time.time()-test_start_time, val_acc, val_loss))

torch.save(model,'model/MyCNN.pkl')


5
{'0': 'beef_tartare', '1': 'apple_pie', '2': 'beef_carpaccio', '3': 'baby_back_ribs', '4': 'baklava'}
[001/050] 574.47 sec(s) Train Acc: 0.494400 Loss: 0.018787 


<IPython.core.display.Javascript object>

[001/050] 30.28 sec(s) Val Acc: 0.588800 loss: 0.017424


### 输出验证集精度

In [None]:
'''
模型校验
'''
model.eval()
prediction = []
with torch.no_grad():
    for i, (x, y) in enumerate(test_dataloader):
        y_pred = model(x.to(device))
        # 預測值中概率最大的下標即爲模型預測的食物標籤
        # test_label = np.argmax(y_pred.cpu().data.numpy(), axis=1)
        # print(i)
        val_acc = np.sum(np.argmax(y_pred.cpu().data.numpy(), axis=1) == y.numpy())
        prediction.append(val_acc / len(y))
print('test acc: ', np.mean(prediction))

0
0.609375
1
0.59375
2
0.59375
3
0.640625
4
0.6875
5
0.59375
6
0.671875
7
0.53125
8
0.671875
9
0.5306122448979592
