## config

In [None]:
# banknote classification config

# 超参配置
# yaml
class Hyperparameter:
    # ################################################################
    #                             Data
    # ################################################################
    device = 'cuda'
    data_root = './data/'
    cls_mapper_path = './data/cls_mapper.json'
    train_data_root = '../input/sabastien-marcel/shp_marcel_train/Marcel-Train'
    test_data_root = '../input/sabastien-marcel/shp_marcel_test/Marcel-Test'

    metadata_train_path = './data/train_hand_gesture.txt'
    metadata_eval_path = './data/eval_hand_gesture.txt'
    metadata_test_path = './data/test_hand_gesture.txt'

    class_num = 6
    seed = 1234  # random seed

    # ################################################################
    #                             Model Structure
    # ################################################################
    data_channels = 3
    conv_kernel_size = 3
    fc_drop_prob = 0.3

    # ################################################################
    #                             Experiment
    # ################################################################
    batch_size = 16
    init_lr = 5e-4
    epochs = 100
    verbose_step = 250
    save_step = 1500


HP = Hyperparameter()

## utils

In [None]:
import os
from PIL import Image


# 获取某个文件夹下面所有后缀为suffix的文件，返回path的list
def recursive_fetching(root, suffix=['jpg', 'png']):
    all_file_path = []

    def get_all_files(path):
        all_file_list = os.listdir(path)
        # 遍历该文件夹下的所有目录或者文件
        for file in all_file_list:
            filepath = os.path.join(path, file)
            # 如果是文件夹，递归调用函数
            if os.path.isdir(filepath):
                get_all_files(filepath)
            # 如果不是文件夹，保存文件路径及文件名
            elif os.path.isfile(filepath):
                all_file_path.append(filepath)

    get_all_files(root)

    file_paths = [it for it in all_file_path if os.path.split(it)[-1].split('.')[-1].lower() in suffix]

    return file_paths


def load_meta(meta_path):
    with open(meta_path, 'r') as fr:
        return [line.strip().split('|') for line in fr.readlines()]


def load_image(image_path):
    return Image.open(image_path)


## preprocess

In [None]:
import os
import random
import numpy as np
import json

for foldername in ['data', 'log', 'model_save']:
    if not os.path.exists(foldername):
        os.mkdir(foldername)

np.random.seed(HP.seed)

# 构建类别到id的映射
cls_mapper = {
    'cls2id': {'A': 0, 'B': 1, 'C': 2, 'Five': 3, 'Point': 4, 'V': 5},
    'id2cls': {0: 'A', 1: 'B', 2: 'C', 3: 'Five', 4: 'Point', 5: 'V'}
}
json.dump(cls_mapper, open(HP.cls_mapper_path, 'w'))

# 获取训练集和测试集，并将它们合并
train_items = recursive_fetching(HP.train_data_root, ['ppm'])
test_items = recursive_fetching(HP.test_data_root, ['ppm'])
dataset = train_items + test_items
dataset_num = len(dataset)
random.shuffle(dataset)

dataset_dict = {}
for it in dataset:
    fn_start = os.path.split(it)[-1].split('-')[0]
    cls_id = cls_mapper['cls2id'][fn_start]
    if cls_id not in dataset_dict:
        dataset_dict[cls_id] = [it]
    else:
        dataset_dict[cls_id].append(it)

# 自己划分训练集、评价集和测试集
train_ratio, eval_ratio, test_ratio = 0.8, 0.1, 0.1
train_set, eval_set, test_set = [], [], [],
for _, set_list in dataset_dict.items():
    length = len(set_list)
    train_num, eval_num = int(length * train_ratio), int(length * eval_ratio)
    test_num = length - train_num - eval_num
    random.shuffle(set_list)
    train_set.extend(set_list[:train_num])
    eval_set.extend(set_list[train_num:train_num + eval_num])
    test_set.extend(set_list[train_num + eval_num:])

random.shuffle(train_set)
random.shuffle(eval_set)
random.shuffle(test_set)

print('num of trainset : %d' % (len(train_set)))
print('num of evalset : %d' % (len(eval_set)))
print('num of testset : %d' % (len(test_set)))

with open(HP.metadata_train_path, 'w') as fw:
    for path in train_set:
        fn_start = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper['cls2id'][fn_start]
        fw.write('%d|%s\n' % (cls_id, path))

with open(HP.metadata_eval_path, 'w') as fw:
    for path in eval_set:
        fn_start = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper['cls2id'][fn_start]
        fw.write('%d|%s\n' % (cls_id, path))

with open(HP.metadata_test_path, 'w') as fw:
    for path in test_set:
        fn_start = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper['cls2id'][fn_start]
        fw.write('%d|%s\n' % (cls_id, path))


mode_set, size_set = [], [],
for _, path in load_meta(HP.metadata_test_path):
    img = load_image(path)
    mode_set.append(img.mode)
    size_set.append(img.size)

print(set(mode_set), set(size_set))

## dataset_hg

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms as T

hg_transform = T.Compose([
    T.Resize((112, 112)),
    T.RandomRotation(degrees=45),
    T.GaussianBlur(kernel_size=(3, 3)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

class HandGestureDataset(torch.utils.data.Dataset):

    def __init__(self, metadata_path):
        self.dataset = load_meta(metadata_path)

    def __getitem__(self, index):
        item = self.dataset[index]
        cls_id, path = int(item[0]), item[1]
        image = load_image(path)
        return hg_transform(image).to(HP.device), cls_id
    
    def __len__(self):
        return len(self.dataset)

## model

In [None]:
import torch
from torch.nn.functional import mish


class DSConv2d(torch.nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size):
        super(DSConv2d, self).__init__()
        assert kernel_size % 2 == 1, 'kernel_size must be odd!'
        self.depth_conv = torch.nn.Conv2d(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=(kernel_size, kernel_size),
            padding=(kernel_size // 2, kernel_size // 2),
            groups=in_channels
        )
        self.pointwise_conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1))

    def forward(self, x):
        out = self.depth_conv(x)
        out_final = self.pointwise_conv(out)
        return out_final


class TrialBlock(torch.nn.Module):

    def __init__(self, in_channels):
        super(TrialBlock, self).__init__()
        self.left_flow = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1)),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            torch.nn.Conv2d(
                in_channels=in_channels,
                out_channels=in_channels, kernel_size=(7, 7),
                padding=(7 // 2, 7 // 2)
            )
        )
        self.right_flow = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=in_channels,
                out_channels=in_channels, kernel_size=(7, 7),
                padding=(7 // 2, 7 // 2)
            ),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            torch.nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1))
        )

    def forward(self, x):
        out = self.left_flow(x) + self.right_flow(x) + x
        out_final = mish(out)
        return out_final


class TrialNet(torch.nn.Module):

    def __init__(self):
        super(TrialNet, self).__init__()

        self.tn_conv = torch.nn.Sequential(

            torch.nn.Conv2d(
                in_channels=HP.data_channels,
                out_channels=64,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(64),
            torch.nn.Mish(),
            TrialBlock(in_channels=64),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            torch.nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(128),
            torch.nn.Mish(),
            TrialBlock(in_channels=128),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            torch.nn.Conv2d(
                in_channels=128,
                out_channels=256,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(256),
            torch.nn.Mish(),
            TrialBlock(in_channels=256),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            TrialBlock(in_channels=256),
            TrialBlock(in_channels=256),
            TrialBlock(in_channels=256),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )

        self.tn_fc = torch.nn.Sequential(

            torch.nn.Linear(in_features=256 * 7 * 7, out_features=2048),
            torch.nn.Mish(),
            torch.nn.Dropout(HP.fc_drop_prob),

            torch.nn.Linear(in_features=2048, out_features=1024),
            torch.nn.Mish(),
            torch.nn.Dropout(HP.fc_drop_prob),

            torch.nn.Linear(in_features=1024, out_features=HP.class_num)
        )

    def forward(self, x):
        out = self.tn_conv(x)
        out_final = self.tn_fc(out.view(x.size(0), -1))
        return out_final

## trainer

In [None]:
import os.path
import random
import torch
import numpy as np
from tensorboardX import SummaryWriter
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

logger = SummaryWriter('./log')

# seed init: 保证模型的可复现性
torch.manual_seed(HP.seed)
random.seed(HP.seed)
np.random.seed(HP.seed)
torch.cuda.manual_seed(HP.seed)


def evaluate(model, devloader, crit):
    model.eval()
    sum_loss = 0.
    with torch.no_grad():
        for batch in devloader:
            x, y = batch
            pred = model(x)
            loss = crit(pred, y.to(HP.device))
            sum_loss += loss.item()

    model.train()
    return sum_loss / len(devloader)


def save_checkpoint(model, epoch, opt, save_path):
    save_dict = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': opt.state_dict()
    }
    torch.save(save_dict, save_path)


def train():

    model = TrialNet().to(HP.device)

    criterion = nn.CrossEntropyLoss()

    opt = optim.Adam(model.parameters(), lr=HP.init_lr)

    trainset = HandGestureDataset(HP.metadata_train_path)
    train_loader = DataLoader(trainset, batch_size=HP.batch_size, shuffle=True, drop_last=True)

    devset = HandGestureDataset(HP.metadata_eval_path)
    dev_loader = DataLoader(devset, batch_size=HP.batch_size, shuffle=True, drop_last=False)

    start_epoch, step = 0, 0

    model.train()

    for epoch in range(start_epoch, HP.epochs):
        print('Start Epoch: %d, Steps: %d' % (epoch, len(train_loader) / HP.batch_size))
        for batch in train_loader:
            x, y = batch  # 加载数据
            opt.zero_grad()  # 梯度归零
            pred = model(x)
            loss = criterion(pred, y.to(HP.device))
            
            loss.backward()
            opt.step()

            logger.add_scalar('Loss/Train', loss, step)

            if not step % HP.verbose_step:
                eval_loss = evaluate(model, dev_loader, criterion)
                logger.add_scalar('Loss/Dev', eval_loss, step)

            if not step % HP.save_step:
                model_path = 'model_%d_%d.model' % (epoch, step)
                save_checkpoint(model, epoch, opt, os.path.join('model_save', model_path))
            
            if step == 7000:
                model_path = 'model_%d_%d.model' % (epoch, step)
                save_checkpoint(model, epoch, opt, os.path.join('model_save', model_path))

            step += 1
            logger.flush()
            print('Epoch:[%d/%d], step:%d, Train Loss:%.5f, Dev Loss:%.5f' % (epoch, HP.epochs, step, loss.item(), eval_loss))

    torch.save(model, "hgmodel.dm")
    logger.close()

In [None]:
train()