In [1]:
#### label 정보
#### (배경 - 0 / 몸통 - 1 / 오른손 - 2 / 왼손 - 3 / 왼발 - 4 / 오른발 - 5 / 오른쪽 허벅지 - 6 / 왼쪽 허벅지 - 7 / 오른쪽 종아리 - 8 / 왼쪽 종아리 - 9 / 왼쪽 팔 - 10 / 
#### 오른쪽 팔 - 11 / 왼쪽 전완 - 12 / 오른쪽 전완 - 13 / 머리 - 14)
#### 해당 값이 RGB 값임 ex) 배경 0,0,0
### mask 확장자 png input image 확장자 jpg 해깔리지 말것

In [2]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMSegmentation installation
import mmseg
print(mmseg.__version__)
import mmcv
import os.path as osp

1.9.0 True
0.18.0


In [None]:
from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
from mmseg.core.evaluation import get_palette

In [None]:
classes = ('bg', 'body', 'right_hand', 'left_hand', 'left_leg', 'right_reg', 'right_thigh', 'left_thigh','right_calf','left_calf'
           ,'left_arm','right_arm','left_forearm','right_forearm','head')

In [None]:
palette = [[0,0,0], [1,1,1], [2,2,2], [3,3,3],[4,4,4], [5,5,5], [6,6,6], [7,7,7], [8,8,8], [9,9,9], [10,10,10], [11,11,11], [12,12,12], [13,13,13], [14,14,14]]

In [None]:
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset

@DATASETS.register_module()
class body_seg(CustomDataset):
  CLASSES = classes
  PALETTE = palette
  def __init__(self, split, **kwargs):
    super().__init__(img_suffix='.jpg', seg_map_suffix='.png', 
                     split=split, **kwargs)
    assert osp.exists(self.img_dir) and self.split is not None


In [None]:
from mmcv import Config

In [None]:
from mmseg.apis import set_random_seed
cfg = Config.fromfile('mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py')

In [None]:
cfg.dataset_type = 'body_seg'
cfg.data_root = '/root'


# Since we use ony one GPU, BN is used instead of SyncBN
## encoding 부분은 layer normalization 
## encoding 이후 ecoder part 부터는 batchnormalization
cfg.norm_cfg = dict(type='LN', requires_grad=True)
cfg.model.backbone.norm_cfg = cfg.norm_cfg
cfg.model.decode_head.norm_cfg = dict(type='BN', requires_grad=True)
cfg.model.auxiliary_head.norm_cfg = dict(type='BN', requires_grad=True)


# modify num classes of the model in decode/auxiliary head
cfg.model.decode_head.num_classes = 15
cfg.model.auxiliary_head.num_classes = 15

cfg.data.samples_per_gpu = 6
cfg.data.workers_per_gpu = 2

cfg.img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
cfg.crop_size = (256, 256)

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),
    dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(type='Normalize', **cfg.img_norm_cfg),
    dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]

cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(320, 240),
        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='Normalize', **cfg.img_norm_cfg),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
]

cfg.data.train.type = cfg.dataset_type
cfg.data.train.data_root = cfg.data_root
cfg.data.train.img_dir = 'train2014'
cfg.data.train.ann_dir = 'train_mask'
cfg.data.train.pipeline = cfg.train_pipeline
cfg.data.train.split = 'splits/train.txt'

cfg.data.val.type = cfg.dataset_type
cfg.data.val.data_root = cfg.data_root
cfg.data.val.img_dir = 'val2014'
cfg.data.val.ann_dir = 'val_mask'
cfg.data.val.pipeline = cfg.test_pipeline
cfg.data.val.split = 'splits/val.txt'


cfg.data.test.type = cfg.dataset_type
cfg.data.test.data_root = cfg.data_root
cfg.data.test.img_dir = 'val2014'
cfg.data.test.ann_dir = 'val_mask'
cfg.data.test.pipeline = cfg.test_pipeline
cfg.data.test.split = 'splits/val.txt'

checkpoint_file = 'pretrained/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.pth'
cfg.load_from = checkpoint_file

# Set up working dir to save files and logs.
cfg.work_dir = './work_dirs/trainver1'

# 1epoch 대략 43000
cfg.runner.max_iters = 43000*50
cfg.log_config.interval = 11000
cfg.evaluation.interval = 43000
cfg.checkpoint_config.interval = 43000

# Set seed to facitate reproducing the result
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

print(f'Config:\n{cfg.pretty_text}')

Config:
norm_cfg = dict(type='LN', requires_grad=True)
backbone_norm_cfg = dict(type='LN', requires_grad=True)
model = dict(
    type='EncoderDecoder',
    pretrained='pretrain/swin_base_patch4_window12_384_22k.pth',
    backbone=dict(
        type='SwinTransformer',
        pretrain_img_size=384,
        embed_dims=128,
        patch_size=4,
        window_size=12,
        mlp_ratio=4,
        depths=[2, 2, 18, 2],
        num_heads=[4, 8, 16, 32],
        strides=(4, 2, 2, 2),
        out_indices=(0, 1, 2, 3),
        qkv_bias=True,
        qk_scale=None,
        patch_norm=True,
        drop_rate=0.0,
        attn_drop_rate=0.0,
        drop_path_rate=0.3,
        use_abs_pos_embed=False,
        act_cfg=dict(type='GELU'),
        norm_cfg=dict(type='LN', requires_grad=True)),
    decode_head=dict(
        type='UPerHead',
        in_channels=[128, 256, 512, 1024],
        in_index=[0, 1, 2, 3],
        pool_scales=(1, 2, 3, 6),
        channels=512,
        dropout_ratio=0.1,
     

In [None]:
from mmseg.datasets import build_dataset
from mmseg.models import build_segmentor
from mmseg.apis import train_segmentor


# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector

# config_file = 'mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py'
# model = init_segmentor(config_file, checkpoint_file, device='cuda:0')
model = build_segmentor(
    cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_segmentor(model, datasets, cfg, distributed=False, validate=True, 
                meta=dict())

  f'"{src_arg_name}" is deprecated in '
2021-10-26 11:24:36,885 - mmseg - INFO - Loaded 26437 images
2021-10-26 11:24:39,702 - mmseg - INFO - Loaded 1508 images
2021-10-26 11:24:39,702 - mmseg - INFO - load checkpoint from pretrained/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.pth
2021-10-26 11:24:39,703 - mmseg - INFO - Use load_from_local loader

size mismatch for decode_head.conv_seg.weight: copying a param with shape torch.Size([150, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([15, 512, 1, 1]).
size mismatch for decode_head.conv_seg.bias: copying a param with shape torch.Size([150]) from checkpoint, the shape in current model is torch.Size([15]).
size mismatch for auxiliary_head.conv_seg.weight: copying a param with shape torch.Size([150, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([15, 256, 1, 1]).
size mismatch for auxiliary_head.conv_seg.bias: copying a param with shape torch.Size([150]) from chec

[                                                  ] 0/1508, elapsed: 0s, ETA:



[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1508/1508, 15.7 task/s, elapsed: 96s, ETA:     0s

2021-10-26 17:14:39,645 - mmseg - INFO - per class results:
2021-10-26 17:14:39,647 - mmseg - INFO - 
+---------------+-------+-------+
|     Class     |  IoU  |  Acc  |
+---------------+-------+-------+
|       bg      | 93.82 | 97.11 |
|      body     | 64.53 | 81.91 |
|   right_hand  | 31.03 | 50.96 |
|   left_hand   | 14.96 | 20.66 |
|    left_leg   |  18.6 | 33.01 |
|   right_reg   |  5.71 |  6.49 |
|  right_thigh  | 30.82 |  57.4 |
|   left_thigh  | 10.67 | 12.94 |
|   right_calf  | 16.03 | 22.71 |
|   left_calf   | 21.05 | 34.52 |
|    left_arm   |  15.7 | 21.62 |
|   right_arm   | 24.32 | 37.36 |
|  left_forearm | 17.86 | 27.15 |
| right_forearm | 23.76 | 38.57 |
|      head     | 73.74 | 86.83 |
+---------------+-------+-------+
2021-10-26 17:14:39,648 - mmseg - INFO - Summary:
2021-10-26 17:14:39,648 - mmseg - INFO - 
+-------+-------+-------+
|  aAcc |  mIoU |  mAcc |
+-------+-------+-------+
| 90.89 | 30.84 | 41.95 |
+-------+-------+-------+
2021-10-26 17:14:39,651 - mmse

[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1508/1508, 15.8 task/s, elapsed: 95s, ETA:     0s

2021-10-26 23:04:38,375 - mmseg - INFO - per class results:
2021-10-26 23:04:38,377 - mmseg - INFO - 
+---------------+-------+-------+
|     Class     |  IoU  |  Acc  |
+---------------+-------+-------+
|       bg      | 93.55 | 97.64 |
|      body     | 63.92 | 78.96 |
|   right_hand  | 31.92 |  49.5 |
|   left_hand   | 15.95 |  21.0 |
|    left_leg   | 21.34 | 40.54 |
|   right_reg   |  0.53 |  0.54 |
|  right_thigh  |  5.48 |  6.02 |
|   left_thigh  | 27.72 | 55.27 |
|   right_calf  | 13.51 | 17.35 |
|   left_calf   | 20.38 |  31.8 |
|    left_arm   |  9.15 | 11.98 |
|   right_arm   | 24.17 | 40.16 |
|  left_forearm | 11.62 | 14.96 |
| right_forearm | 28.54 | 46.94 |
|      head     | 73.09 | 83.32 |
+---------------+-------+-------+
2021-10-26 23:04:38,378 - mmseg - INFO - Summary:
2021-10-26 23:04:38,378 - mmseg - INFO - 
+-------+-------+-------+
|  aAcc |  mIoU |  mAcc |
+-------+-------+-------+
| 90.78 | 29.39 | 39.73 |
+-------+-------+-------+
2021-10-26 23:04:38,381 - mmse

[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1508/1508, 15.7 task/s, elapsed: 96s, ETA:     0s

2021-10-27 04:53:55,749 - mmseg - INFO - per class results:
2021-10-27 04:53:55,751 - mmseg - INFO - 
+---------------+-------+-------+
|     Class     |  IoU  |  Acc  |
+---------------+-------+-------+
|       bg      | 93.62 | 97.76 |
|      body     | 65.08 | 79.52 |
|   right_hand  | 30.16 | 44.34 |
|   left_hand   | 18.37 | 25.66 |
|    left_leg   | 19.61 | 31.59 |
|   right_reg   |  3.16 |  3.42 |
|  right_thigh  | 14.28 | 18.16 |
|   left_thigh  | 26.17 | 44.61 |
|   right_calf  | 13.03 | 16.75 |
|   left_calf   | 23.36 | 36.89 |
|    left_arm   | 13.64 | 16.92 |
|   right_arm   | 29.25 | 47.41 |
|  left_forearm | 15.73 | 20.33 |
| right_forearm | 30.08 |  47.6 |
|      head     | 72.86 | 83.17 |
+---------------+-------+-------+
2021-10-27 04:53:55,751 - mmseg - INFO - Summary:
2021-10-27 04:53:55,752 - mmseg - INFO - 
+------+-------+-------+
| aAcc |  mIoU |  mAcc |
+------+-------+-------+
| 91.1 | 31.23 | 40.94 |
+------+-------+-------+
2021-10-27 04:53:55,755 - mmseg - I

In [11]:
model

EncoderDecoder(
  (backbone): SwinTransformer(
    (patch_embed): PatchEmbed(
      (adap_padding): AdaptivePadding()
      (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (drop_after_pos): Dropout(p=0.0, inplace=False)
    (stages): ModuleList(
      (0): SwinBlockSequence(
        (blocks): ModuleList(
          (0): SwinBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): ShiftWindowMSA(
              (w_msa): WindowMSA(
                (qkv): Linear(in_features=128, out_features=384, bias=True)
                (attn_drop): Dropout(p=0.0, inplace=False)
                (proj): Linear(in_features=128, out_features=128, bias=True)
                (proj_drop): Dropout(p=0.0, inplace=False)
                (softmax): Softmax(dim=-1)
              )
              (drop): DropPath()
            )
            (norm2): LayerNorm((128,), eps=1e-05, el

In [12]:
# config_file = 'mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py'
# model = init_segmentor(config_file, checkpoint_file, device='cuda:0')
result = inference_segmentor(model, 'val2014/COCO_val2014_000000581929.jpg')

AttributeError: 'EncoderDecoder' object has no attribute 'cfg'

In [18]:
model

EncoderDecoder(
  (backbone): SwinTransformer(
    (patch_embed): PatchEmbed(
      (adap_padding): AdaptivePadding()
      (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (drop_after_pos): Dropout(p=0.0, inplace=False)
    (stages): ModuleList(
      (0): SwinBlockSequence(
        (blocks): ModuleList(
          (0): SwinBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): ShiftWindowMSA(
              (w_msa): WindowMSA(
                (qkv): Linear(in_features=128, out_features=384, bias=True)
                (attn_drop): Dropout(p=0.0, inplace=False)
                (proj): Linear(in_features=128, out_features=128, bias=True)
                (proj_drop): Dropout(p=0.0, inplace=False)
                (softmax): Softmax(dim=-1)
              )
              (drop): DropPath()
            )
            (norm2): LayerNorm((128,), eps=1e-05, el