<a href="https://colab.research.google.com/github/JonChanGit/d2l-zh/blob/master/d2l_%E7%AB%9E%E8%B5%9B2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install d2l mxnet torch torchvision torchaudio
! nvidia-smi
! unzip /content/drive/MyDrive/sample/classify-leaves.zip  -d /content/classify-leaves/

In [4]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
from PIL import Image
import pandas as pd
import numpy as np

from d2l import torch as d2l
import torch
from torch import nn
from torch.nn import functional as F

from sklearn.preprocessing import LabelEncoder

In [22]:
def load_label_map(csv_file):
  """
  映射标签和分类序号关系
  """
  labels = pd.read_csv(csv_file).iloc[:, 1].values
  unique_labels, counts = np.unique(labels, return_counts=True)
  class2num_map: dict[str, int] = dict(zip(unique_labels, range(len(unique_labels))))
  num2class_map: dict[int, str] = dict(zip(range(len(unique_labels)), unique_labels))
  return class2num_map ,num2class_map

class2num_map ,num2class_map = load_label_map('/content/classify-leaves/train.csv')

lr, num_epochs, batch_size = 0.05, 10, 1

In [23]:
# 残差块
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        # num_channel 输出通道数
        # use_1x1conv 是否使用 1*1卷积层
        # stride。步长
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        # 批量归一化
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            # 对输入直接使用 1*1 卷积改写输入形状，以便更改残差块输出形状
            X = self.conv3(X)
        # 残差链接： 输出 + 输入
        Y += X
        return F.relu(Y)

def getNet():
  # 定义模型
  # b1 卷积（输出通道提升到64）+归一化+激活+最大池化
  b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
                    nn.BatchNorm2d(64), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


  def resnet_block(input_channels, num_channels, num_residuals,
                  first_block=False):
      # num_residual 残差块数量
      # first_block b1中已经高宽减半了，所以此标识表示不减半，通道数不变
      blk = []
      for i in range(num_residuals):
          if i == 0 and not first_block:
              blk.append(Residual(input_channels, num_channels,
                                  use_1x1conv=True, strides=2))
          else:
              blk.append(Residual(num_channels, num_channels))
      return blk

  # b2 残差stage
  b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
  b3 = nn.Sequential(*resnet_block(64, 128, 2))
  b4 = nn.Sequential(*resnet_block(128, 256, 2))
  b5 = nn.Sequential(*resnet_block(256, 512, 2))

  #
  net = nn.Sequential(b1, b2, b3, b4, b5,
                      nn.AdaptiveAvgPool2d((1,1)),
                      nn.Flatten(), nn.Linear(512, len(class2num_map)))
  return net

In [24]:
class LeavesDataset(Dataset):
  def __init__(self, root_dir, csv_file, train=True):
      """
      csv_file (string): csv 文件路径
      root_dir (string): 图像文件的目录路径
      transform (callable, optional): 一个可选的转换函数，用于对样本进行处理
      train (bool, optional): 是训练集还是测试集的标志
      """
      self.data_frame = pd.read_csv(csv_file)
      self.root_dir = root_dir
      trans = [transforms.ToTensor()]
      trans.insert(0, transforms.Resize(96))
      self.train = train
      self.transform = transforms.Compose(trans)
      self.label_transform = class2num_map

  def __getitem__(self, idx):
    if self.train:
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name)
        label = self.data_frame.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        #print('???')
        return image, self.label_transform[label]
    else:
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)
        return image

  def __len__(self):
    # 返回数据集中图像的数量
    return len(self.data_frame)



In [29]:
def evaluate_accuracy_gpu(net, data_iter, device=None):
    """使用GPU计算模型在数据集上的精度

    Defined in :numref:`sec_lenet`"""
    if isinstance(net, nn.Module):
        net.eval()  # 设置为评估模式
        if not device:
            device = next(iter(net.parameters())).device
    # 正确预测的数量，总预测的数量
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # BERT微调所需的（之后将介绍）
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]

def train(net, train_iter, test_iter, num_epochs, lr, device):
  def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
  net.apply(init_weights)
  # print('training on', device)
  net.to(device)
  # 优化器
  optimizer = torch.optim.SGD(net.parameters(), lr=lr)
  # 损失函数
  loss = nn.CrossEntropyLoss()
  # 画图
  animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
                          legend=['train loss', 'train acc', 'test acc'])
  timer, num_batches = d2l.Timer(), len(train_iter)
  for epoch in range(num_epochs):
    # 训练损失之和，训练准确率之和，样本数
    metric = d2l.Accumulator(3)
    net.train()
    for i, (X, y) in enumerate(train_iter):
      timer.start()
      optimizer.zero_grad()
      # 此处的X，y受batch_size影响，一次获取一批数据
      X, y = X.to(device), y.to(device)
      # 本次预测值
      y_hat = net(X)
      #print(y_hat, y)
      # 计算损失
      l = loss(y_hat, y)
      # 计算梯度并优化
      l.backward()
      optimizer.step()
      with torch.no_grad():
        # 临时禁用梯度
        metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
      timer.stop()
      train_l = metric[0] / metric[2]
      train_acc = metric[1] / metric[2]
      if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
          animator.add(epoch + (i + 1) / num_batches,
                        (train_l, train_acc, None))
  test_acc = evaluate_accuracy_gpu(net, test_iter)
  animator.add(epoch + 1, (None, None, test_acc))
  print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
        f'test acc {test_acc:.3f}')
  print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
        f'on {str(device)}')

In [None]:
net = getNet()
train_set = DataLoader(LeavesDataset('/content/classify-leaves/', '/content/classify-leaves/train.csv'), batch_size, shuffle=True, num_workers=4)
test_set = DataLoader(LeavesDataset('/content/classify-leaves/', '/content/classify-leaves/test.csv'), batch_size, shuffle=True, num_workers=4)

train(net, train_set, test_set, num_epochs, lr, d2l.try_gpu())



In [34]:
train_iter, test_iter = d2l.load_data_fashion_mnist(1, resize=96)
for i, (X, y) in enumerate(train_iter):
  break
X.shape, y



(torch.Size([1, 1, 96, 96]), tensor([8]))

In [35]:
net

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-

In [42]:
d2l.try_gpu()

device(type='cuda', index=0)