# Matting训练

同济子豪兄 2023-2-27

## 进入 MMEditing 主目录

In [1]:
import os
os.chdir('mmediting')

## 导入工具包

In [2]:
from tqdm import tqdm
import mmengine
from mmengine.runner import set_random_seed

## 下载matting数据集

来源：alphamatting.com

In [4]:
# 创建文件夹
!mkdir data/alphamatting

In [3]:
# 下载压缩包
!wget https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230227-MMEditing/Matting_Dataset.zip -P data/alphamatting

--2023-02-27 19:29:28--  https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230227-MMEditing/Matting_Dataset.zip
正在连接 172.16.0.13:5848... 已连接。
已发出 Proxy 请求，正在等待回应... 200 OK
长度： 20866578 (20M) [application/zip]
正在保存至: “data/alphamatting/Matting_Dataset.zip”


2023-02-27 19:29:29 (23.9 MB/s) - 已保存 “data/alphamatting/Matting_Dataset.zip” [20866578/20866578])



In [4]:
# 解压
!unzip -d data/alphamatting/ data/alphamatting/Matting_Dataset.zip >> /dev/null

In [5]:
# 删除压缩包
!rm -rf data/alphamatting/Matting_Dataset.zip

## 创建训练集标注

In [3]:
num_file = 27
num_training = 20  # use 20 samples for training, 7 for test

training_ann = []
for i in tqdm(range(num_training)):
    ann = dict()
    ann['merged_path'] = f'merged/GT{i+1:02d}.png'
    ann['alpha_path'] = f'alpha/GT{i+1:02d}.png'
    # since data from alphamatting.com is not composited, we use original image 
    # as fg and bg
    ann['fg_path'] = ann['merged_path']
    ann['bg_path'] = ann['merged_path']
    training_ann.append(ann)

mmengine.dump(training_ann, './data/alphamatting/training_list.json')

100%|██████████| 20/20 [00:00<00:00, 129254.36it/s]


## 创建测试集标注

In [5]:
num_trimap = 1
test_ann = []
for i in tqdm(range(num_training, num_file)):
    for j in range(num_trimap):
        ann = {}
        ann['merged_path'] = f'merged/GT{i+1:02d}.png'
        ann['trimap_path'] = f'trimap/Trimap{j+1}/GT{i+1:02d}.png'
        ann['alpha_path'] = f'alpha/GT{i+1:02d}.png'
        test_ann.append(ann)

mmengine.dump(test_ann, './data/alphamatting/test_list.json')

100%|██████████| 7/7 [00:00<00:00, 22812.84it/s]


## config配置文件

In [3]:
from mmengine import Config
cfg = Config.fromfile('./configs/indexnet/indexnet_mobv2_1xb16-78k_comp1k.py')

In [4]:
# print(cfg.pretty_text)

In [5]:
# 训练集
cfg.dataset_type = 'AdobeComp1kDataset'
cfg.train_dataloader.dataset.type = cfg.dataset_type
cfg.train_dataloader.dataset.ann_file = './data/alphamatting/training_list.json'
cfg.train_dataloader.dataset.data_prefix = dict(img_path = './data/alphamatting/')

# 测试集
cfg.val_dataloader.dataset.type = cfg.dataset_type
cfg.val_dataloader.dataset.ann_file = './data/alphamatting/test_list.json'
cfg.val_dataloader.dataset.data_prefix = dict(img_path = './data/alphamatting/')

# 测试集
cfg.test_dataloader = cfg.val_dataloader

# 预训练模型
# cfg.model.pretrained = None
cfg.load_from = './checkpoint/indexnet_mobv2_1x16_78k_comp1k_SAD-45.6_20200618_173817-26dd258d.pth'

# 保存模型和日志的目录
cfg.work_dir = './tutorial_exps/indexnet'

# Use smaller batch size for training
# cfg.data.train_dataloader.samples_per_gpu = 4
cfg.train_dataloader.batch_size = 4
# cfg.data.workers_per_gpu = 1

# 初始学习率（batch size为16时的学习率）
# cfg.optimizers.lr = cfg.optimizers.lr / 4

cfg.total_iters = 50
cfg.lr_config = None

# # 每隔多少次迭代，在测试集上评估模型性能
# cfg.evaluation.interval = 20
# # 每隔多少次迭代，保存模型权重文件
# cfg.checkpoint_config.interval = 40
# # 每隔多少次迭代，输出日志
# cfg.log_config.interval = 5

# 训练迭代次数
cfg.train_cfg.max_iters = 200
# 测试集上评估模型间隔
cfg.train_cfg.val_interval = 20
# 日志记录间隔
cfg.default_hooks.logger.interval = 5
# 模型权重保存间隔
cfg.default_hooks.checkpoint.interval = 40

# 随机数种子
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpus = 1

## 查看完整config配置文件

In [6]:
# print(cfg.pretty_text)

## 保存config配置文件

In [7]:
cfg.dump('new_cfg.py')

## 准备训练

In [8]:
from mmengine.runner import Runner
from mmedit.utils import register_all_modules

# register all modules in mmseg into the registries
# do not init the default scope here because it will be init in the runner
register_all_modules(init_default_scope=False)
runner = Runner.from_cfg(cfg)

02/27 21:25:24 - mmengine - [4m[37mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.7.10 (default, Jun  4 2021, 14:48:32) [GCC 7.5.0]
    CUDA available: True
    numpy_random_seed: 209652396
    GPU 0: NVIDIA GeForce RTX 3060
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.2, V11.2.152
    GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
    PyTorch: 1.10.1+cu113
    PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=co

## 开始训练

如果遇到报错`CUDA out of memeory`，重启实例或使用显存更高的实例即可。

In [9]:
runner.train()

FileNotFoundError: class `IterBasedTrainLoop` in mmengine/runner/loops.py: class `AdobeComp1kDataset` in mmedit/datasets/comp1k_dataset.py: [Errno 2] No such file or directory: 'data/adobe_composition-1k/./data/alphamatting/training_list.json'