## config

In [1]:
# banknote classification config

# 超参配置
# yaml
class Hyperparameter:
    # ################################################################
    #                             Data
    # ################################################################
    device = 'cpu'
    data_root = './data/'
    cls_mapper_path = './data/cls_mapper.json'
    train_data_root = '../input/sabastien-marcel/shp_marcel_train/Marcel-Train'
    test_data_root = '../input/sabastien-marcel/shp_marcel_test/Marcel-Test'

    metadata_train_path = '../input/gesture-output/data/train_hand_gesture.txt'
    metadata_eval_path = '../input/gesture-output/data/eval_hand_gesture.txt'
    metadata_test_path = '../input/gesture-output/data/test_hand_gesture.txt'

    class_num = 6
    seed = 1234  # random seed

    # ################################################################
    #                             Model Structure
    # ################################################################
    data_channels = 3
    conv_kernel_size = 3
    fc_drop_prob = 0.3

    # ################################################################
    #                             Experiment
    # ################################################################
    batch_size = 16
    init_lr = 5e-4
    epochs = 100
    verbose_step = 250
    save_step = 1500


HP = Hyperparameter()

## utils

In [2]:
import os
from PIL import Image


# 获取某个文件夹下面所有后缀为suffix的文件，返回path的list
def recursive_fetching(root, suffix=['jpg', 'png']):
    all_file_path = []

    def get_all_files(path):
        all_file_list = os.listdir(path)
        # 遍历该文件夹下的所有目录或者文件
        for file in all_file_list:
            filepath = os.path.join(path, file)
            # 如果是文件夹，递归调用函数
            if os.path.isdir(filepath):
                get_all_files(filepath)
            # 如果不是文件夹，保存文件路径及文件名
            elif os.path.isfile(filepath):
                all_file_path.append(filepath)

    get_all_files(root)

    file_paths = [it for it in all_file_path if os.path.split(it)[-1].split('.')[-1].lower() in suffix]

    return file_paths


def load_meta(meta_path):
    with open(meta_path, 'r') as fr:
        return [line.strip().split('|') for line in fr.readlines()]


def load_image(image_path):
    return Image.open(image_path)


## dataset_hg

In [3]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms as T

hg_transform = T.Compose([
    T.Resize((112, 112)),
    # T.RandomRotation(degrees=45),
    # T.GaussianBlur(kernel_size=(3, 3)),
    # T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

class HandGestureDataset(torch.utils.data.Dataset):

    def __init__(self, metadata_path):
        self.dataset = load_meta(metadata_path)

    def __getitem__(self, index):
        item = self.dataset[index]
        cls_id, path = int(item[0]), item[1]
        image = load_image(path)
        return hg_transform(image).to(HP.device), cls_id
    
    def __len__(self):
        return len(self.dataset)

## model

In [4]:
import torch
from torch.nn.functional import mish


class DSConv2d(torch.nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size):
        super(DSConv2d, self).__init__()
        assert kernel_size % 2 == 1, 'kernel_size must be odd!'
        self.depth_conv = torch.nn.Conv2d(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=(kernel_size, kernel_size),
            padding=(kernel_size // 2, kernel_size // 2),
            groups=in_channels
        )
        self.pointwise_conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1))

    def forward(self, x):
        out = self.depth_conv(x)
        out_final = self.pointwise_conv(out)
        return out_final


class TrialBlock(torch.nn.Module):

    def __init__(self, in_channels):
        super(TrialBlock, self).__init__()
        self.left_flow = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1)),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            torch.nn.Conv2d(
                in_channels=in_channels,
                out_channels=in_channels, kernel_size=(7, 7),
                padding=(7 // 2, 7 // 2)
            )
        )
        self.right_flow = torch.nn.Sequential(
            torch.nn.Conv2d(
                in_channels=in_channels,
                out_channels=in_channels, kernel_size=(7, 7),
                padding=(7 // 2, 7 // 2)
            ),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.Mish(),
            torch.nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1))
        )

    def forward(self, x):
        out = self.left_flow(x) + self.right_flow(x) + x
        out_final = mish(out)
        return out_final


class TrialNet(torch.nn.Module):

    def __init__(self):
        super(TrialNet, self).__init__()

        self.tn_conv = torch.nn.Sequential(

            torch.nn.Conv2d(
                in_channels=HP.data_channels,
                out_channels=64,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(64),
            torch.nn.Mish(),
            TrialBlock(in_channels=64),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            torch.nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(128),
            torch.nn.Mish(),
            TrialBlock(in_channels=128),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            torch.nn.Conv2d(
                in_channels=128,
                out_channels=256,
                kernel_size=(3, 3),
                padding=(3 // 2, 3 // 2)
            ),
            torch.nn.BatchNorm2d(256),
            torch.nn.Mish(),
            TrialBlock(in_channels=256),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            TrialBlock(in_channels=256),
            TrialBlock(in_channels=256),
            TrialBlock(in_channels=256),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )

        self.tn_fc = torch.nn.Sequential(

            torch.nn.Linear(in_features=256 * 7 * 7, out_features=2048),
            torch.nn.Mish(),
            torch.nn.Dropout(HP.fc_drop_prob),

            torch.nn.Linear(in_features=2048, out_features=1024),
            torch.nn.Mish(),
            torch.nn.Dropout(HP.fc_drop_prob),

            torch.nn.Linear(in_features=1024, out_features=HP.class_num)
        )

    def forward(self, x):
        out = self.tn_conv(x)
        out_final = self.tn_fc(out.view(x.size(0), -1))
        return out_final

## inference

In [5]:
import torch
from torch.utils.data import DataLoader

model = TrialNet()
checkpoint = torch.load('../input/gesture-output/model_save/model_25_7000.model', map_location=HP.device)
model.load_state_dict(checkpoint['model_state_dict'])

testset = HandGestureDataset(HP.metadata_test_path)
test_loader = DataLoader(testset, batch_size=HP.batch_size, shuffle=True, drop_last=False)

model.eval()

total_count = 0
correct_count = 0

with torch.no_grad():
    for batch in test_loader:
        x, y = batch
        pred = model(x)
        total_count += pred.size(0)
        correct_count += (torch.argmax(pred, 1) == y).sum()

print('Acc: %.3f' % (correct_count / total_count))

Acc: 0.968
