In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify parameters
pipeline_params={
}
step_params={
}
substep_params={   
    "MAX_SIZE"     : 640,
    "BATCH"        : 8,
    "WORKERS"      : 0,
    "SEED"         : 42,
    "EPOCH_COUNT"  : 5,
    "MODEL_NAME"   : "yolox_s",
    "optimizer_lr" : 0.001,
    "pretrain_weights": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth"
}

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, default_param_values, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params, **default_param_values("params/step_params.json"))

substep.interface(
    inputs =
    [ 
      { STEP_NAME: "data_prep", ENTITY_NAME: "train_data"}, # train dataset from data_prep step
      { STEP_NAME: "data_prep", ENTITY_NAME: "eval_data"}, # eval dataset from data_prep step
      { STEP_NAME: "data_prep", ENTITY_NAME: "dataset_config"} # datasets config from data_prep step
    ],
    tmp_outputs =
    [
        { ENTITY_NAME: "train_eval_data" }, # datasets for train and eval on next substep
        { ENTITY_NAME: "train_eval_config" }, # datasets configurations
        { ENTITY_NAME: "pretrain_weights" }, # pretrain weights
        { ENTITY_NAME: "yolox_obj_detector"} # resulting detector files
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

![interface 1_configure_train.drawio](./imgs/1_configure_train.drawio.png)

In [None]:
import logging
import os.path as osp
import os
from pathlib import Path
logging.root.setLevel(substep_params.get('loggingLevel', 'INFO'))

In [None]:
from sinara.store import SinaraStore

inputs = substep.inputs(step_name = "data_prep")
tmp_outputs = substep.tmp_outputs()

# LOAD Train Images
SinaraStore.dearchive_store_files_to_tmp(store_path=inputs.train_data, tmp_dir=tmp_outputs.train_eval_data)
# LOAD Valid Images
SinaraStore.dearchive_store_files_to_tmp(store_path=inputs.eval_data, tmp_dir=tmp_outputs.train_eval_data)
# copy config from previos step to outputs
SinaraStore.dearchive_store_files_to_tmp(store_path=inputs.dataset_config, tmp_dir=tmp_outputs.train_eval_config)

In [None]:
# Checking the version of libraries and checking the availability of the cuda kernel
import torch
print(f"{torch.__version__=}")

assert torch.cuda.is_available(), f"Cuda not available"

if torch.cuda.is_available():
    device_id = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device_id)
    print(f"{device_name=}")
    print(f"{torch.cuda.device_count()=}")

#### Load pretrain weights to directory of pretrain_weights

In [None]:
tmp_outputs = substep.tmp_outputs()
data_url = substep_params["pretrain_weights"]
pretrain_weights_path = osp.join(tmp_outputs.pretrain_weights, osp.basename(data_url))
 
!wget {data_url} -O {pretrain_weights_path}

#### Read config and append training parameters

In [None]:
import json

config_fn = os.path.join(tmp_outputs.train_eval_config, 'config.json')

with open(config_fn) as f_id:
    CONFIG = json.load(f_id)

CONFIG['train_config_parameters'] = substep_params
# Normalize RGB images with standard normalization factor mean-std imagenet
CONFIG['train_config_parameters']["Normalize"] = {'mean': [123.675, 116.28, 103.53],
                                                  'std': [58.395, 57.12, 57.375],
                                                  'to_rgb': True}

## Setting up the training model

### Initializing modules from mmdetection, mmcv

In [None]:
import copy
import time
import warnings
import math

import mmcv
from mmcv import Config, ConfigDict

import mmdet
from mmdet.apis import init_random_seed, set_random_seed, train_detector
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.utils import get_root_logger
import mmcls.models

print(f"{mmcv.__version__=}")
print(f"{mmdet.__version__=}")

### Defining basic variables from the config

In [None]:
# Defining basic variables from the config
base_seed = substep_params['SEED']**2
seed = init_random_seed(base_seed)
set_random_seed(seed, deterministic=False)

EPOCH_COUNT = substep_params['EPOCH_COUNT']
BATCH       = substep_params['BATCH']
WORKERS     = substep_params['WORKERS']

MODEL_NAME   = substep_params['MODEL_NAME']
optimizer_lr = substep_params['optimizer_lr']

CHECKPOINT_INTERVAL = 10
############################################
PROJECT_FOLDER = tmp_outputs.yolox_obj_detector
MAX_SIZE       = substep_params['MAX_SIZE']
CLASSES        = CONFIG['CLASSES']
CLASSES_COUNT  = len(CLASSES)

### Setting up basic model training config parameters

In [None]:
cfg_dir = osp.join(osp.dirname(mmdet.__file__), '.mim', 'configs')

cfg_path = "yolox/yolox_s_8x8_300e_coco.py"

cfg = Config.fromfile(osp.join(cfg_dir, cfg_path))

cfg.evaluation = {'metric': ['bbox'], 'save_best' : 'bbox_mAP'}
# cfg.evaluation = {'metric': ['segm'], 'save_best' : 'segm_mAP'}

#### set and create directory for saving model
cfg.work_dir = os.path.join(PROJECT_FOLDER, MODEL_NAME)
os.makedirs(cfg.work_dir, exist_ok=True)

cfg.img_size = MAX_SIZE
cfg.model.bbox_head.num_classes = CLASSES_COUNT
cfg.model.test_cfg.nms.iou_threshold=0.5

#### Set pretain_weights
cfg.load_from = pretrain_weights_path

#### Set frozen backbone
cfg.model.backbone.frozen_stages = 4

workflow = [('train', 1), ('val', 1)]
cfg.workflow = workflow

cfg.img_norm_cfg = CONFIG['train_config_parameters']["Normalize"]
cfg.data_root = ''

### Augmentation pipelines for training and validation

In [None]:
from mmdet.datasets import PIPELINES

# registry augmenation - DataAsList
try:
    @PIPELINES.register_module()
    class DataAsList:
        def __call__(self, results):
            aug_data_dict = {key: [val] for key, val in results.items()}
            return aug_data_dict
except Exception as e:
    print(e)

cfg.img_scale = (MAX_SIZE, MAX_SIZE)
dataset_type = 'CocoDataset'

cfg.train_pipeline = [
    {'type': 'LoadImageFromFile'},
    {'type': 'LoadAnnotations', 'with_bbox': True},
    {'type': 'Resize', 'img_scale' : (MAX_SIZE, MAX_SIZE), 'keep_ratio': False},
    {'type': 'FilterAnnotations', 'min_gt_bbox_wh': (4.0, 4.0)},
    {'type': 'RandomFlip', 'flip_ratio': 0.5},
    {'type': 'Pad', 'size_divisor':32},
    {'type': 'Normalize', **cfg.img_norm_cfg},
    {'type': 'DefaultFormatBundle'},
    {'type': 'Collect', 'keys': ['img', 'gt_bboxes', 'gt_labels']}
]

cfg.test_pipeline = [
    {'type': 'LoadImageFromFile'},
    {'type': 'Resize', 'img_scale' : (MAX_SIZE, MAX_SIZE), 'keep_ratio': False},
    {'type': 'RandomFlip', 'flip_ratio': 0.0},
    {'type': 'Pad', 'size_divisor':32},   
    {'type': 'Normalize', **cfg.img_norm_cfg},
    {'type': 'DefaultFormatBundle'},
    {'type': 'Collect', 'keys': ['img']},
    {'type': 'DataAsList'}
]

cfg.train_dataset = dict(
        type='CocoDataset',
        filter_empty_gt=False, # for empty transporter (wht obj)
        img_prefix=tmp_outputs.train_eval_data,
        ann_file=osp.join(tmp_outputs.train_eval_data, CONFIG["train_coco_annotation"]), 
        pipeline=cfg.train_pipeline,
        classes=CLASSES,
)

cfg.test_dataset = dict(
        type='CocoDataset',
        filter_empty_gt=False,
        img_prefix=tmp_outputs.train_eval_data,
        ann_file=osp.join(tmp_outputs.train_eval_data, CONFIG["val_coco_annotation"]), 
        pipeline=cfg.test_pipeline,
        classes=CLASSES,
)

data = dict(
    samples_per_gpu=BATCH,
    workers_per_gpu=WORKERS,
    train=cfg.train_dataset, 
    val=cfg.test_dataset, 
    test=cfg.test_dataset
)

cfg['data'] = ConfigDict(data)

### Setting up the optimizer configuration for model training

In [None]:
# Get count images of training dataset
%%time

import io, re
with io.open(cfg.data.train['ann_file']) as fd:
    train_dataset = fd.read()
train_dataset = re.findall('file_name', train_dataset)

In [None]:
# Setting up the optimizer configuration
ITERS_IN_ONE_EPOCH = int(len(train_dataset) / BATCH)
MAX_ITER = (ITERS_IN_ONE_EPOCH * EPOCH_COUNT) - 1
print(f"{ITERS_IN_ONE_EPOCH=}")
print(f"{MAX_ITER=}")

cfg.num_last_epochs = 15

cfg.optimizer = dict(type='Adam', lr=optimizer_lr)
   
cfg.optimizer_config = {} #dict(grad_clip=None)

cfg.lr_config = dict(
    # _delete_=True,
    policy= 'CosineAnnealing', #'YOLOX',
    warmup='linear',
    warmup_ratio=0.001,
    warmup_iters=int(MAX_ITER * 0.25), # 5 epoch
    min_lr_ratio=1e-5)

cfg.runner = dict(type='EpochBasedRunner', max_epochs=EPOCH_COUNT)

checkpoint_config = dict(interval=CHECKPOINT_INTERVAL)
if CHECKPOINT_INTERVAL == -1:
    checkpoint_config = None
    
log_config = dict(
    interval=(BATCH*2) if (BATCH*2) < (ITERS_IN_ONE_EPOCH / 2) else (ITERS_IN_ONE_EPOCH // 2),
    hooks=[
        dict(type='TextLoggerHook', ignore_last=False)
    ]
)

cfg.merge_from_dict({
    "os" : None,
    "ITERS_IN_ONE_EPOCH" : ITERS_IN_ONE_EPOCH,
    "MAX_ITER" : MAX_ITER,
    "EPOCH_COUNT" : EPOCH_COUNT,
    "data" : data,
    "checkpoint_config" : checkpoint_config,
    "log_level" : logging.getLevelName(logging.root.level),    
    "log_config" : log_config,
    "resume_from": None,
})

cfg.auto_resume = True # resume from the latest checkpoint automatically

if cfg.load_from is not None:
    cfg.auto_resume = False

cfg.gpu_ids = [torch.cuda.current_device()]
cfg.device='cuda'

### Saving the configured config

In [None]:
# dump config
config_file = osp.join(cfg.work_dir, "last_cfg.py")
cfg.dump(file=config_file)

CONFIG['config_file'] = config_file
CONFIG['work_dir']    = cfg.work_dir

with open(config_fn, 'w') as f_id:
    json.dump(CONFIG, f_id, indent=4)