In [1]:
#导入包
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pylab as plt
import torchvision.models as models
from torch.nn import functional as F
from d2l import torch as d2l
#This is for the progress bar.
# from tqdm import tqdm
# import seaborn as sns

In [2]:
#看看label文件长啥样
labels_dataframe=pd.read_csv('./classify-leaves/train.csv')
labels_dataframe.head(5)

Unnamed: 0,image,label
0,images/0.jpg,maclura_pomifera
1,images/1.jpg,maclura_pomifera
2,images/2.jpg,maclura_pomifera
3,images/3.jpg,maclura_pomifera
4,images/4.jpg,maclura_pomifera


In [3]:
labels_dataframe.describe()

Unnamed: 0,image,label
count,18353,18353
unique,18353,176
top,images/4702.jpg,maclura_pomifera
freq,1,353


In [4]:
#把label文件排个序
leaves_labels=sorted(list(set(labels_dataframe['label'])))
n_classes=len(leaves_labels)
print(n_classes)
leaves_labels[:10]

176


['abies_concolor',
 'abies_nordmanniana',
 'acer_campestre',
 'acer_ginnala',
 'acer_griseum',
 'acer_negundo',
 'acer_palmatum',
 'acer_pensylvanicum',
 'acer_platanoides',
 'acer_pseudoplatanus']

In [5]:
#把label转换为对应的数字
class_to_num=dict(zip(leaves_labels,range(n_classes)))
class_to_num

{'abies_concolor': 0,
 'abies_nordmanniana': 1,
 'acer_campestre': 2,
 'acer_ginnala': 3,
 'acer_griseum': 4,
 'acer_negundo': 5,
 'acer_palmatum': 6,
 'acer_pensylvanicum': 7,
 'acer_platanoides': 8,
 'acer_pseudoplatanus': 9,
 'acer_rubrum': 10,
 'acer_saccharinum': 11,
 'acer_saccharum': 12,
 'aesculus_flava': 13,
 'aesculus_glabra': 14,
 'aesculus_hippocastamon': 15,
 'aesculus_pavi': 16,
 'ailanthus_altissima': 17,
 'albizia_julibrissin': 18,
 'amelanchier_arborea': 19,
 'amelanchier_canadensis': 20,
 'amelanchier_laevis': 21,
 'asimina_triloba': 22,
 'betula_alleghaniensis': 23,
 'betula_jacqemontii': 24,
 'betula_lenta': 25,
 'betula_nigra': 26,
 'betula_populifolia': 27,
 'broussonettia_papyrifera': 28,
 'carpinus_betulus': 29,
 'carpinus_caroliniana': 30,
 'carya_cordiformis': 31,
 'carya_glabra': 32,
 'carya_ovata': 33,
 'carya_tomentosa': 34,
 'castanea_dentata': 35,
 'catalpa_bignonioides': 36,
 'catalpa_speciosa': 37,
 'cedrus_atlantica': 38,
 'cedrus_deodara': 39,
 'cedru

In [6]:
#再转换回来，方便最后预测的时候使用
num_to_class={v:k for k,v in class_to_num.items()}

In [7]:
# 继承pytorch的dataset，创建自己的
class LeavesData(Dataset):
    def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.2, resize_height=256, resize_width=256):
        """
        Args:
            csv_path (string): csv 文件路径
            img_path (string): 图像文件所在路径
            mode (string): 训练模式还是测试模式
            valid_ratio (float): 验证集比例
        """
        
        # 需要调整后的照片尺寸，我这里每张图片的大小尺寸不一致#
        self.resize_height = resize_height
        self.resize_width = resize_width

        self.file_path = file_path
        self.mode = mode

        # 读取 csv 文件
        # 利用pandas读取csv文件
        self.data_info = pd.read_csv(csv_path, header=None)  #header=None是去掉表头部分
        # 计算 length
        self.data_len = len(self.data_info.index) - 1
        self.train_len = int(self.data_len * (1 - valid_ratio))
        
        if mode == 'train':
            # 第一列包含图像文件的名称
            self.train_image = np.asarray(self.data_info.iloc[1:self.train_len, 0])  #self.data_info.iloc[1:,0]表示读取第一列，从第二行开始到train_len
            # 第二列是图像的 label
            self.train_label = np.asarray(self.data_info.iloc[1:self.train_len, 1])
            self.image_arr = self.train_image 
            self.label_arr = self.train_label
        elif mode == 'valid':
            self.valid_image = np.asarray(self.data_info.iloc[self.train_len:, 0])  
            self.valid_label = np.asarray(self.data_info.iloc[self.train_len:, 1])
            self.image_arr = self.valid_image
            self.label_arr = self.valid_label
        elif mode == 'test':
            self.test_image = np.asarray(self.data_info.iloc[1:, 0])
            self.image_arr = self.test_image
            
        self.real_len = len(self.image_arr)

        print('Finished reading the {} set of Leaves Dataset ({} samples found)'
              .format(mode, self.real_len))

    def __getitem__(self, index):
        # 从 image_arr中得到索引对应的文件名
        single_image_name = self.image_arr[index]

        # 读取图像文件
        img_as_img = Image.open(self.file_path + single_image_name)

        #如果需要将RGB三通道的图片转换成灰度图片可参考下面两行
#         if img_as_img.mode != 'L':
#             img_as_img = img_as_img.convert('L')

        #设置好需要转换的变量，还可以包括一系列的nomarlize等等操作
        if self.mode == 'train':
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(p=0.5),   #随机水平翻转 选择一个概率
                transforms.ToTensor()
            ])
        else:
            # valid和test不做数据增强
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor()
            ])
        
        img_as_img = transform(img_as_img)
        
        if self.mode == 'test':
            return img_as_img
        else:
            # 得到图像的 string label
            label = self.label_arr[index]
            # number label
            number_label = class_to_num[label]

            return img_as_img, number_label  #返回每一个index对应的图片数据和对应的label

    def __len__(self):
        return self.real_len

In [8]:
train_path='./classify-leaves/train.csv'
test_path='./classify-leaves/test.csv'
#csv文件已经images的路径了，因此这里只到上一级目录
img_path='./classify-leaves/'


train_dataset=LeavesData(train_path,img_path,mode='train')
val_dataset=LeavesData(train_path,img_path,mode='valid')
test_dataset=LeavesData(test_path,img_path,mode='test')
print(train_dataset)
print(val_dataset)
print(test_dataset)

Finished reading the train set of Leaves Dataset (14681 samples found)
Finished reading the valid set of Leaves Dataset (3672 samples found)
Finished reading the test set of Leaves Dataset (8800 samples found)
<__main__.LeavesData object at 0x0000022818FA4730>
<__main__.LeavesData object at 0x0000022818F787F0>
<__main__.LeavesData object at 0x0000022818F789A0>


In [9]:
#定义data loader
train_loader=torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=5
)

val_loader=torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=5
)

test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=16, 
        shuffle=False,
        num_workers=5
    )

In [10]:
#测试代码，存在问题
# train_loader = iter(train_loader)
# val_loader=iter(val_loader)
# print(train_loader)
# print(next(train_loader))

In [11]:
# 给大家展示一下数据长啥样
# def im_convert(tensor):
#     """ 展示数据"""
    
#     image = tensor.to("cpu").clone().detach()
#     image = image.numpy().squeeze()
#     image = image.transpose(1,2,0)
#     image = image.clip(0, 1)

#     return image

# fig=plt.figure(figsize=(20, 12))
# columns = 4
# rows = 2

# dataiter = iter(val_loader)
# inputs, classes = dataiter.next()

# for idx in range (columns*rows):
#     ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
#     ax.set_title(num_to_class[int(classes[idx])])
#     plt.imshow(im_convert(inputs[idx]))
# plt.show()

### ResNet

In [12]:
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,
                use_1x1conv=False,strides=1):
        super().__init__()
        self.conv1=nn.Conv2d(input_channels,num_channels,
                            kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,
                            kernel_size=3,padding=1)
        if use_1x1conv:
            self.conv3=nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2=nn.BatchNorm2d(num_channels)
        
    def forward(self,X):
        Y=F.relu(self.bn1(self.conv1(X)))
        Y=self.bn2(self.conv2(Y))
        if self.conv3:
            X=self.conv3(X)
        Y+=X
        return F.relu(Y)

In [13]:
b1=nn.Sequential(nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3),
                nn.BatchNorm2d(64),nn.ReLU(),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1))

In [14]:
def resnet_block(input_channels,num_channels,num_residuals,
                first_block=False):
    blk=[]
    for i in range(num_residuals):
        #第一个块，并且不是整个网络的第一个块，使用1x1卷积和步幅为2
        if i==0 and not first_block:
            blk.append(Residual(input_channels,num_channels,
                               use_1x1conv=True,strides=2))
        else:
            blk.append(Residual(num_channels,num_channels))
    return blk

In [15]:
b2=nn.Sequential(*resnet_block(64,64,2,first_block=True))
b3=nn.Sequential(*resnet_block(64,256,2))
b4=nn.Sequential(*resnet_block(256,512,2))
b5=nn.Sequential(*resnet_block(512,1024,2))

In [16]:
net=nn.Sequential(b1,b2,b3,b4,b5,
                nn.AdaptiveAvgPool2d((1,1)),
                 nn.Flatten(),nn.Linear(1024,176))

In [None]:
lr, num_epochs, batch_size = 0.01, 30, 16
#train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_loader, val_loader, num_epochs, lr, d2l.try_gpu())

training on cuda:0
