# <center>7.3.2 实战: CIFAR数据集分类和猫狗分类</center>

实验说明：本实验代码利用Pytorch框架搭建Resnet模型，并使用CIFAR数据集对模型进行训练，最终能进行猫狗分类。运行该代码需要的环境如下：
Python3.6+
Pytorch1.6.0


## 1.导入需要的python库

首先导入需要的python库，基于Pytorch用于构建和训练模型。以下代码均为Pytorch代码。

In [2]:
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import torch.optim as optim

from tqdm import tqdm
import pandas as pd

## 2.定义图片读取函数

规定从文件夹中读取图片的规范，划分训练与测试用图片并分配标签。

In [3]:
def text_save(filename,data_dir,data_class):
    file = open(filename,'a')
    for i in range(len(data_class)):
        s = str(data_dir[i]+' '+str(data_class[i])) +'\n'
        file.write(s)
    file.close()
    print('文件保存成功')

def get_files(file_dir):    #从路径获得文件
    cat = []
    dog = []
    label_dog = []
    label_cat = []
    for file in os.listdir(file_dir):
        name = file.split(sep = '.')
        if name[0]=='cat':
            cat.append(file_dir + file)
            label_cat.append(0) #标签0对应猫
        else:
            dog.append(file_dir + file)
            label_dog.append(1) #标签1对应狗
    print('There are %d cats and %d dogs' %(len(cat), (len(dog))))

    cat.extend(dog)
    label_cat.extend(label_dog)
    image_list = cat
    label_list = label_cat
    print(type(image_list))
    return image_list,label_list

def data_process():
    image_list, label_list = get_files('train/')
    text_save('data.txt', image_list, label_list)

## 3.图片预处理

将图像进行尺寸缩放，转化为tensor并作归一化。

In [4]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),  # 将图片转换为Tensor,归一化至[0,1]
    transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
])

## 4.构建数据集

定义训练集和测试集的类，生成list。

In [5]:
class train_Dataset(Dataset):
    def __init__(self, txt_path, transform=None, target_transform=None):
        fh = open(txt_path, 'r')
        imgs = []
        for line in fh:
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))

        self.imgs = imgs  # 最主要就是生成该list， 然后DataLoader中给index，通过getitem读取图片数据
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB') # 像素值 0~255，在transfrom.totensor会除以255，使像素值变成 0~1
        if self.transform is not None:
            img = self.transform(img) # 在这里做transform，转为tensor
        #img = torchvision.transforms.functional.to_tensor(img)
        return img, label

    def __len__(self):
        return len(self.imgs)
    
class test_dataset(Dataset):
    def __init__(self, csv_file, test_dir, transform = None, target_transform = None):
            #csv_file是sample_submission.csv的位置，test_dir是test图片的文件夹
            self.test_csv = pd.read_csv(csv_file)
            self.test_dir = test_dir
            self.transform = transform

    def __getitem__(self, index):
        image_name = os.path.join(self.test_dir,str(int(self.test_csv.ix[index,0])))
        image_name = image_name + '.jpg'
        image = Image.open(image_name).convert('RGB')
        if self.transform is not None:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.test_csv)

## 5.定义模型训练与效果评估函数

训练、测试并保存模型。

In [6]:
def save_models(net,epoch):   #保存模型
    torch.save(net.state_dict(),'model/mymodel_epoch_1{}.pth'.format(epoch))
    print('model saved')

def train(dataloader, net, lr = 0.01, momentum = 0.9, epochs = 10 ):  #训练函数
    cirterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),lr,momentum)
    print('开始训练')
    for epoch in range(epochs):
        net.train()
        train_acc = 0.0
        for i,(image,label) in tqdm(enumerate(dataloader)):
            image,label = image.cuda(),label.cuda()
            optimizer.zero_grad()
            output = net(image)
            loss = cirterion(output, label)
            loss.backward()
            optimizer.step()
            _, prediction = torch.max(output.data, 1)
            train_acc += torch.sum(prediction == label.data)
            if i % 100 == 0:
                accuracy = train_acc/1600.0*100
                print("epoch: %d Iteration %d loss: %f accuracy: %f"%(epoch,i,loss,accuracy))
                train_acc = 0
        if epoch % 3 == 0:
            save_models(net, epoch)
    print('训练完成')
    save_models(net, epoch)
    
def train_model():
    dataset = train_Dataset('data.txt', transform = transform)
    train_dataloader = DataLoader(dataset,batch_size= 16, shuffle = True, num_workers= 0)
    #dataloader加载完成
    model = torchvision.models.resnet50(pretrained=True)
    model.fc = nn.Sequential(nn.Linear(2048,2))
    model = model.cuda()
    print('model construct finished')
    #开始训练
    train(net = model, lr = 0.0001, momentum = 0.09, epochs = 19 , dataloader = train_dataloader)
    
def eval():
    csv_file = 'sample_submission.csv'
    test_dir = 'test/'
    test_data = test_dataset(csv_file, test_dir, transform)
    test_dataloader = DataLoader(test_data, batch_size= 1, shuffle = False, num_workers= 0)
    #test_datalaoder加载完成
    model = torchvision.models.resnet50(pretrained=False)
    model.fc = nn.Sequential(
        nn.Linear(2048,2),
    )
    model.load_state_dict(torch.load("mymodel_epoch_19.pth"))
    model = model.cuda()
    print('model_load finished')
    result = []
    model.eval()
    for i , image in tqdm(enumerate(test_dataloader)):
        image = image.cuda()
        output = model(image)
        _, prediction = torch.max(output.data, 1)
        result.append(prediction.item())
    #将结果写入文件
    dataframe = pd.DataFrame({'label':result})
    dataframe.to_csv("result.csv",sep=',')

## 6.执行训练

In [7]:
data_process()
train_model()

There are 12500 cats and 12500 dogs
<class 'list'>
文件保存成功


0it [00:00, ?it/s]

model construct finished
开始训练


2it [00:00,  5.50it/s]

epoch: 0 Iteration 0 loss: 0.743521 accuracy: 0.562500


102it [00:14,  7.56it/s]

epoch: 0 Iteration 100 loss: 0.624546 accuracy: 55.187500


202it [00:28,  8.00it/s]

epoch: 0 Iteration 200 loss: 0.560010 accuracy: 80.250000


302it [00:41,  6.82it/s]

epoch: 0 Iteration 300 loss: 0.455680 accuracy: 89.812500


402it [00:53,  6.37it/s]

epoch: 0 Iteration 400 loss: 0.438194 accuracy: 93.125000


502it [01:05,  8.05it/s]

epoch: 0 Iteration 500 loss: 0.389462 accuracy: 93.250000


602it [01:17,  7.96it/s]

epoch: 0 Iteration 600 loss: 0.384843 accuracy: 94.687500


702it [01:30,  7.44it/s]

epoch: 0 Iteration 700 loss: 0.350448 accuracy: 95.062500


802it [01:44,  6.75it/s]

epoch: 0 Iteration 800 loss: 0.280344 accuracy: 95.312500


902it [01:57,  7.75it/s]

epoch: 0 Iteration 900 loss: 0.315880 accuracy: 95.062500


1002it [02:09,  7.80it/s]

epoch: 0 Iteration 1000 loss: 0.280065 accuracy: 95.500000


1102it [02:22,  6.90it/s]

epoch: 0 Iteration 1100 loss: 0.227536 accuracy: 95.562500


1202it [02:34,  7.15it/s]

epoch: 0 Iteration 1200 loss: 0.158288 accuracy: 95.374992


1302it [02:47,  7.19it/s]

epoch: 0 Iteration 1300 loss: 0.225764 accuracy: 95.687500


1402it [03:00,  6.25it/s]

epoch: 0 Iteration 1400 loss: 0.196748 accuracy: 96.000000


1502it [03:14,  6.53it/s]

epoch: 0 Iteration 1500 loss: 0.269232 accuracy: 95.874992


1602it [03:28,  7.36it/s]

epoch: 0 Iteration 1600 loss: 0.124474 accuracy: 96.375000


1702it [03:42,  7.72it/s]

epoch: 0 Iteration 1700 loss: 0.097692 accuracy: 96.125000


1802it [03:55,  7.15it/s]

epoch: 0 Iteration 1800 loss: 0.188617 accuracy: 96.125000


1902it [04:10,  6.23it/s]

epoch: 0 Iteration 1900 loss: 0.175515 accuracy: 96.625000


2002it [04:22,  7.71it/s]

epoch: 0 Iteration 2000 loss: 0.098016 accuracy: 96.375000


2102it [04:34,  8.10it/s]

epoch: 0 Iteration 2100 loss: 0.103188 accuracy: 96.750000


2202it [04:48,  7.06it/s]

epoch: 0 Iteration 2200 loss: 0.179235 accuracy: 96.125000


2302it [05:01,  8.07it/s]

epoch: 0 Iteration 2300 loss: 0.249965 accuracy: 97.187492


2402it [05:12,  8.17it/s]

epoch: 0 Iteration 2400 loss: 0.117296 accuracy: 97.250000


2502it [05:24,  8.14it/s]

epoch: 0 Iteration 2500 loss: 0.147641 accuracy: 96.437492


2602it [05:35,  7.81it/s]

epoch: 0 Iteration 2600 loss: 0.152393 accuracy: 96.500000


2702it [05:48,  7.73it/s]

epoch: 0 Iteration 2700 loss: 0.050487 accuracy: 96.812500


2802it [06:00,  7.45it/s]

epoch: 0 Iteration 2800 loss: 0.062162 accuracy: 96.375000


2902it [06:13,  7.84it/s]

epoch: 0 Iteration 2900 loss: 0.369442 accuracy: 96.437492


3002it [06:25,  7.44it/s]

epoch: 0 Iteration 3000 loss: 0.260396 accuracy: 95.750000


3102it [06:37,  7.63it/s]

epoch: 0 Iteration 3100 loss: 0.056514 accuracy: 97.250000


3125it [06:40,  7.81it/s]


FileNotFoundError: [Errno 2] No such file or directory: '/model/mymodel_epoch_10.pth'