In [2]:
import torch
from torchvision import transforms
NUM_CLASSES = 22
LABELS = [
    "ape", "bear", "bison", "cat",
    "chicken", "cow", "deer", "dog",
    "dolphin", "duck", "eagle", "fish",
    "horse", "lion", "lobster", "pig",
    "rabbit", "shark", "snake", "spider",
    "turkey", "wolf"
]
LABEL_MAP = {
    0: "ape", 1: "bear", 2: "bison", 3: "cat",
    4: "chicken", 5: "cow", 6: "deer", 7: "dog",
    8: "dolphin", 9: "duck", 10: "eagle", 11: "fish",
    12: "horse", 13: "lion", 14: "lobster",
    15: "pig", 16: "rabbit", 17: "shark", 18: "snake",
    19: "spider", 20:  "turkey", 21: "wolf"
}

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


你还需要把训练集进行分割，分割出 Valid 集

由于本任务数据的特殊性，可以直接使用 ImageFolder ，而不需要单独设计 Dataset 类，因此先定义 transforms, 参阅 https://pytorch.org/tutorials/beginner/basics/transforms_tutorial.html 。

In [3]:
import torch
from torchvision import transforms

transform_labeled = transforms.Compose(
    [
        transforms.Resize((64,64)),
        transforms.ToTensor()
    ]
) #!TODO: rewrite this statement

label2onehot = transforms.Lambda(lambda y: torch.zeros(
    NUM_CLASSES, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))

transform_val = None #!TODO: rewrite this statement

In [4]:
import os
from torchvision.datasets import VisionDataset
from PIL import Image

class TEST(VisionDataset):
    def __init__(self,root, transform, target_transform):
        self.img_dir = root
        self.transform = transform
        self.target_transform = target_transform
        self.samples = os.listdir(self.img_dir)

    def __len__(self):
        
        return len(self.samples)

    def __getitem__(self, index):
        img_path = os.path.normpath(os.path.join(self.img_dir, self.samples[index]))
        with open(img_path, "rb") as f:
            img = Image.open(f).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img

将训练、测试集读入 DataLoader

In [10]:
import os

from torchvision.datasets import ImageFolder, VisionDataset
from torch.utils.data import DataLoader

data_path = 'Animals/Animals_Dataset' #@param
batch_size = 16 #@param
num_workers = 0 #@param

train_path = os.path.normpath((os.path.join(data_path, 'train')))
test_path = os.path.normpath((os.path.join(data_path, 'test')))

train_dataset = ImageFolder(
    train_path,
    transform_labeled,
    target_transform = label2onehot
)
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, lengths=[7, 3], generator=torch.Generator().manual_seed(0))
test_dataset = TEST(
    test_path,
    transform = transform_labeled,
    target_transform = label2onehot 
)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    num_workers=num_workers
)
valid_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    num_workers=num_workers
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    num_workers=num_workers) #!TODO: rewrite this statement

## 建立模型

本节介绍如何定义一个模型，参阅 https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html

先进行预处理

In [6]:
import torch
from torch import nn
from torch.nn import functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


定义模型类，注意到 PyTorch 是动态图，因此在数据进行前向传播时我们相当于已经定义了 Backpropagation 反向传播。

In [7]:
class BersonNetwork(nn.Module):
  def __init__(self):
    super(BersonNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.c1 = nn.Conv2d(3, 20, 5, 2, 0)
    self.c2 = nn.Conv2d(20, 1, 5, 1, 0)
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(676, 128), #!TODO: Change 0 to a proper value
        #!Tips: You should calculate the number of neurons.
        nn.ReLU(),
        nn.Linear(128, 512),
        nn.ReLU(),
        nn.Linear(512, NUM_CLASSES),#!TODO: Change 0 to a proper value
    )

  def forward(self, x):
    x = self.c1(x)
    x = F.relu(x) #!Question: What's the difference between torch.nn.relu() and torch.nn.F.relu()
    x = self.c2(x)
    x = F.relu(x)
    x = x.view(x.size(0), -1)
    logits = self.linear_relu_stack(x)
    return logits

这样模型就可以定义为：

In [8]:
model = BersonNetwork().to(device)
# print(model)
# model.train()

In [9]:
model.forward

<bound method BersonNetwork.forward of BersonNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (c1): Conv2d(3, 20, kernel_size=(5, 5), stride=(2, 2))
  (c2): Conv2d(20, 1, kernel_size=(5, 5), stride=(1, 1))
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=676, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=22, bias=True)
  )
)>

设置模型训练超参数、损失函数与优化器

In [8]:
learning_rate = 1e-3 #@param
batch_size = 64 #@param
epochs = 5 #@param
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

定义评价指标

In [9]:
class AverageMeter(object):
  """Computes and stores the average and current value"""
  def __init__(self, name, fmt=':f'):
      self.name = name
      self.fmt = fmt
      self.reset()

  def reset(self):
      self.val = 0
      self.avg = 0
      self.sum = 0
      self.count = 0

  def update(self, val, n=1):
      self.val = val
      self.sum += val * n
      self.count += n
      self.avg = self.sum / self.count

  def __str__(self):
      fmtstr = '{name} {avg' + self.fmt + '}'
      return fmtstr.format(**self.__dict__)

def accuracy(output:torch.Tensor, target, topk=(1,)):
  """Computes the accuracy over the k top predictions for the specified values of k"""
  with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    print(pred.shape, target.shape)
    pred = pred.t()
    print(target.view(1, -1).shape)
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')

训练

In [10]:
import time

start = time.time()
for i in range(epochs):
  for batch, (X, y) in enumerate(train_loader):
    X = X.to(device)
    y = y.to(device)
    data_time.update(time.time() - start)
    pred = model(X)
    loss = loss_fn(pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # acc1, acc5 = accuracy(pred, y, topk=(1, 5))
    losses.update(loss.item(), X.size(0))
    # top1.update(acc1[0], X.size(0))
    # top5.update(acc5[0], X.size(0))

  batch_time.update(time.time() - start)
  start = time.time()

  print(f"Epoch:{i + 1}: {batch_time}, {losses}")

Epoch:1: Time  5.069, Loss 3.1996e+00
Epoch:2: Time  4.983, Loss 3.1518e+00
Epoch:3: Time  4.956, Loss 3.1340e+00
Epoch:4: Time  4.924, Loss 3.1250e+00
Epoch:5: Time  4.914, Loss 3.1196e+00


# 测试

In [11]:
model.eval()
with torch.no_grad():
    path = os.path.normpath(os.path.join(test_path, '0.png'))
    with open(path, "rb") as f:
        image0 = Image.open(f).convert("RGB")
        image0.show()
    image0 = transform_labeled(image0).to(device)
    pred = model(image0)
    label = LABEL_MAP[int(pred.argmax())]
    print(label)

cow


由于模型过于简单，因此训练准确率不高。你可以尝试训练其它的模型（甚至于预训练模型，但预训练模型如何进行训练也是一个值得讨论的问题）。
同时，这里并没有使用 Valid 集来进行评价，而是使用了 train 集，你可以尝试加入 Valid 集（如提前分割，或用其他方式等）



# 输出结果

输出结果非常简单，与训练类似，但模型不能更新参数

In [12]:
model.eval()
labels = []
with torch.no_grad():
  for _, images in enumerate(test_loader):
    y = model(images.to(device))
    batch_labels = torch.argmax(y, dim=1)
    labels.append(batch_labels)
ans = torch.cat(labels, 0).cpu().numpy()
print(ans)
print([LABEL_MAP[i] for i in ans])

[5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5]
['cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow', 'cow']


# 保存模型

参阅 https://pytorch.org/tutorials/beginner/basics/saveloadrun_tutorial.html

In [39]:
torch.save(model, 'model.path')

In [None]:
model = torch.load('model.path')