## 整体流程

1. 先挑选出少量数据，手工清洗一下，需要保证数据集正确率 100%
2. 用这些数据训练一个初步模型出来
3. 用初步模型验证整个数据集，把错误的都挑出来，再次手工清洗
4. 用整个清洗过的数据集，重新训练一个完全体的模型

下面的执行代码只包含步骤 4，前面的步骤可以自己研究下（部分代码在最后的注释里）

## 整理原始数据集

把数据集放到 datases/raw/ 目录下，建两个文件夹 y 和 n

- datasets/raw/y/ 里面放技能 ready 的图
- datasets/raw/n/ 里面放没技能的图

文件名随便


没有 gpu 的把后面代码里所有的 .cuda() 去掉

In [7]:
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
import torch
from torchvision import transforms
from PIL import Image

def default_loader(path):
    return Image.open(path).convert('RGB')

class SkillDataset(Dataset):
    def __init__(self, path: Path) -> None:
        super().__init__()
        self.y = list((path / 'y').glob('**/*'))
        self.n = list((path / 'n').glob('**/*'))
        self.transform = transforms.ToTensor()
        self.loader = default_loader
        # 技能图片没多大，一次性全部载入内存算了
        self.data = [ self.get(i) for i in range(len(self))]
    
    def __len__(self):
        return len(self.y) + len(self.n)
    
    def get(self, index):
        if index < len(self.y):
            if index % 100 == 0:
                print(f'load y: {index} / {len(self.y)}')
            path = self.y[index]
            label = 1
        else:
            if index % 100 == 0:
                print(f'load n: {index - len(self.y)} / {len(self.n)}')
            path = self.n[index - len(self.y)]
            label = 0
        image = self.loader(path)
        image = self.transform(image)
        return image, label
    
    def __getitem__(self, index):
        return self.data[index]
        

In [None]:

raw_path = Path("datasets/raw")
dataset = SkillDataset(raw_path)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# 显存不够可以把 batch size 改小点
train_loader = DataLoader(train_dataset, batch_size=4096, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=4096, shuffle=False, num_workers=0)

In [10]:
import torch

class InceptionA(torch.nn.Module):
    def __init__(self, in_ch) -> None:
        super().__init__()
        self.branch_1x1 = torch.nn.Conv2d(in_ch, 16, kernel_size=1)

        self.branch_5x5_1 = torch.nn.Conv2d(in_ch, 16, kernel_size=1)
        self.branch_5x5_2 = torch.nn.Conv2d(16, 24, kernel_size=5, padding=2)

        self.branch_3x3_1 = torch.nn.Conv2d(in_ch, 16, kernel_size=1)
        self.branch_3x3_2 = torch.nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch_3x3_3 = torch.nn.Conv2d(24, 24, kernel_size=3, padding=1)

        self.branch_pool = torch.nn.Conv2d(in_ch, 24, kernel_size=1)

    def forward(self, x):
        branch_1x1 = self.branch_1x1(x)

        branch_5x5 = self.branch_5x5_1(x)
        branch_5x5 = self.branch_5x5_2(branch_5x5)

        branch_3x3 = self.branch_3x3_1(x)
        branch_3x3 = self.branch_3x3_2(branch_3x3)
        branch_3x3 = self.branch_3x3_3(branch_3x3)

        branch_pool = torch.nn.functional.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch_1x1, branch_5x5, branch_3x3, branch_pool] # 16 + 24 + 24 + 24
        return torch.cat(outputs, 1)


class Net(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = torch.nn.Conv2d(3, 10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(88, 20, kernel_size=5)
        self.incep1 = InceptionA(10)
        self.incep2 = InceptionA(20)
        self.mp = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(32912, 2)

    def forward(self, x):
        in_size = x.size(0)
        x = torch.nn.functional.relu(self.mp(self.conv1(x)))
        x = self.incep1(x)
        x = torch.nn.functional.relu(self.mp(self.conv2(x)))
        x = self.incep2(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x


model = Net()
model = model.cuda()

criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
import time

start_time = time.time()
def train(epoch):
    global start_time
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            cur_time = time.time()
            duration = cur_time - start_time
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tcost: {:.2f} s'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), duration))
            start_time = cur_time
            
def test():
    model.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            output = model(data)
            test_loss += criterion(output, target).item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    acc = 100. * correct / len(test_loader.dataset)

    print(f'loss: {test_loss}', f'acc: {acc}')
    return test_loss

In [None]:
if __name__ == '__main__':
    torch.cuda.empty_cache()
    print('hello')
    min_loss = 100.0
    best_epoch = 0
    for epoch in range(1, 500):
        train(epoch)
        if epoch % 10 == 0:
            loss = test()
            print('pre best is', best_epoch, min_loss)
            torch.save(model, f'checkpoints/model_{epoch}.pt')
            if loss < min_loss:
                min_loss = loss
                best_epoch = epoch
                print('new best', best_epoch, min_loss)
                torch.save(model, 'checkpoints/best.pt')


In [10]:
test()

loss: 1.356194166443271e-06 acc: 99.92270830112845


1.356194166443271e-06

In [20]:
# 导出 onnx

import torch.onnx
from pathlib import Path


def convert_onnx(path: Path):
    model = torch.load(path, map_location=torch.device("cpu"))
    model.eval()
    dummy_input = torch.randn(1, 3, 100, 80)
    axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}}
    torch.onnx.export(
        model,
        dummy_input,
        path.with_suffix(".onnx"),
        input_names=["input"],
        output_names=["output"],
        dynamic_axes=axes,
    )


convert_onnx(Path("checkpoints/best.pt"))


In [19]:
import onnx

onnx.checker.check_model("checkpoints/best.onnx")

## 以下是部分清洗数据的代码，可以自己研究下

In [None]:
# 随机数据挑选，请手工清洗

# from pathlib import Path
# import random

# clean_set_size = 1000
# raw_path = Path("datasets/raw")
# positive_set = random.sample(list((raw_path / "y").glob("**/*")), clean_set_size)
# negative_set = random.sample(list((raw_path / "n").glob("**/*")), clean_set_size)

# clean = Path("datasets/clean/")
# clean_y = clean / "y"
# clean_y.mkdir(parents=True, exist_ok=True)
# clean_n = clean / "n"
# clean_n.mkdir(parents=True, exist_ok=True)
# for path in positive_set:
#     path.rename(clean_y / path.name)
# for path in negative_set:
#     path.rename(clean_n / path.name)


In [41]:
# class SkillRawDataset(Dataset):
#     def __init__(self) -> None:
#         super().__init__()
#         self.y = list(Path('datasets/raw/y/').glob('**/*'))
#         self.n = list(Path('datasets/raw/n/').glob('**/*'))
#         self.transform = transforms.ToTensor()
#         self.loader = default_loader
#         self.data = [ self.get(i) for i in range(len(self))]
    
#     def __len__(self):
#         return len(self.y) + len(self.n)
    
#     def get(self, index):
#         # print(f'load: {self.count} / {len(self)}')
#         if index < len(self.y):
#             path = self.y[index]
#             label = 1
#         else:
#             path = self.n[index - len(self.y)]
#             label = 0
#         image = self.loader(path)
#         image = self.transform(image)
#         return image, label
    
#     def __getitem__(self, index):
#         return self.data[index]
    
#     def get_path(self, index):
#         if index < len(self.y):
#             path = self.y[index]
#             label = 1
#         else:
#             path = self.n[index - len(self.y)]
#             label = 0
#         return path, label

# raw_data_set = SkillRawDataset()


In [55]:

# raw_loader = DataLoader(raw_data_set, batch_size=1, shuffle=False, num_workers=0)
# import os
# import shutil
# def clear():
#     model.eval()
#     test_loss = 0
#     Path('datasets/maybe_error/1').mkdir(parents=True, exist_ok=True)
#     Path('datasets/maybe_error/0').mkdir(parents=True, exist_ok=True)
#     with torch.no_grad():
#         for batch_idx, (data, target) in enumerate(raw_loader):
#             data, target = data.cuda(), target.cuda()
#             output = model(data)
#             loss = criterion(output, target).item() # sum up batch loss
#             test_loss += loss
#             pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
#             correct = pred.eq(target.view_as(pred)).sum().item()
#             if not correct:
#                 tup = raw_data_set.get_path(batch_idx)
#                 print(tup)
#                 os.rename(tup[0], Path('datasets/maybe_error/') / str(tup[1]) / tup[0].name)                
                


# print(len(raw_data_set))
# clear()

32342
(WindowsPath('datasets/raw/y/2022-10-28_20-52-18.867_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_20-52-19.264_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_20-52-19.654_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_21-14-53.013_3453_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_21-32-42.363_7610_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_21-33-17.238_7986_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-23-08.921_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-33-51.125_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-33-56.957_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-34-07.050_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-34-09.139_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-34-11.213_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-34-39.334_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_22-34-43.518_raw.png'), 1)
(WindowsPath('datasets/raw/y/2022-10-28_2