In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#2. specify parameters
pipeline_params={
}
step_params={
}
substep_params={   
    "MAX_SIZE"     : 640,
    "BATCH"        : 8,
    "WORKERS"      : 0,
    "SEED"         : 42,
    "EPOCH_COUNT"  : 5,
    "MODEL_NAME"   : "yolox_s",
    "optimizer_lr" : 0.001,
    "pretrain_weights": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth"
}

In [None]:
#3 define substep interface
from sinara.substep import NotebookSubstep, default_param_values, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params, **default_param_values("params/step_params.json"))

substep.interface(
    
    tmp_inputs =
    [
        { ENTITY_NAME: "aug_dataset" },
        { ENTITY_NAME: "cache_config" },
        { ENTITY_NAME: "cache_data" },
        { ENTITY_NAME: "pretrain_weights"}        
    ]
    
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

![interface 1_configure_train.drawio](./imgs/1_configure_train.drawio.png)

In [None]:
#4 get substep.interface
tmp_inputs = substep.tmp_inputs()

print(f"{tmp_inputs.cache_data=}")
print(f"{tmp_inputs.pretrain_weights=}")
print(f"{tmp_inputs.cache_config=}")
print(f"{tmp_inputs.aug_dataset=}")

In [None]:
import logging
import os.path as osp
import os
from pathlib import Path
logging.root.setLevel(substep_params.get('loggingLevel', 'INFO'))
logging.debug('Запись.')

In [None]:
#5 run spark
from sinara.spark import SinaraSpark

spark = SinaraSpark.run_session(0)
SinaraSpark.ui_url()

In [None]:
# Load pretrain weights to directory of pretrain_weights
data_url = substep_params["pretrain_weights"]
pretrain_weights_path = osp.join(tmp_inputs.pretrain_weights, osp.basename(data_url))
 
!wget {data_url} -O {pretrain_weights_path}

# Create SUCCESS file after successful download
Path(osp.join(tmp_inputs.pretrain_weights, '_SUCCESS')).touch()

#### Read config and append training paramers

In [None]:
import json
import os.path as osp

config_fn = os.path.join(tmp_inputs.cache_config, 'config.json')

with open(config_fn) as f_id:
    CONFIG = json.load(f_id)

CONFIG

In [None]:
CONFIG['train_config_parameters'] = substep_params

In [None]:
CONFIG['train_config_parameters']["Normalize"] = {'mean': [123.675, 116.28, 103.53],
                                                  'std': [58.395, 57.12, 57.375],
                                                  'to_rgb': True}

In [None]:
import torch, torchvision

print(f"{torch.__version__=}")
print(f"{torch.cuda.is_available()=}")
print(f"{torchvision.__version__=}")

if torch.cuda.is_available():
    device_id = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device_id)
    print(f"{device_name=}")
    print(f"{torch.cuda.device_count()=}")

## Setting up the training model



#### Initializing modules from mmdetection, mmcv

Augmentation pipelines for training and validation

In [None]:
import copy
import os
import os.path as osp
import time
import warnings
import math

import mmcv
from mmcv import Config, ConfigDict

import mmdet
from mmdet.apis import init_random_seed, set_random_seed, train_detector
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.utils import get_root_logger
import mmcls.models

print(f"{mmcv.__version__=}")
print(f"{mmdet.__version__=}")

#### Determining the basic parameters for training the model

In [None]:
base_seed = substep_params['SEED']**2
seed = init_random_seed(base_seed)
set_random_seed(seed, deterministic=False)

In [None]:
EPOCH_COUNT = substep_params['EPOCH_COUNT']
BATCH       = substep_params['BATCH']
WORKERS     = substep_params['WORKERS']

MODEL_NAME   = substep_params['MODEL_NAME']
optimizer_lr = substep_params['optimizer_lr']

CHECKPOINT_INTERVAL = 10
############################################
PROJECT_FOLDER = tmp_inputs.cache_data
MAX_SIZE       = substep_params['MAX_SIZE']
CLASSES        = CONFIG['CLASSES']
CLASSES_COUNT  = len(CLASSES)

In [None]:
cfg_dir = osp.join(osp.dirname(mmdet.__file__), '.mim', 'configs')

cfg_path = "yolox/yolox_s_8x8_300e_coco.py"

cfg = Config.fromfile(osp.join(cfg_dir, cfg_path))

In [None]:
print(cfg.pretty_text)

In [None]:
cfg.evaluation = {'metric': ['bbox'], 'save_best' : 'bbox_mAP'}
# cfg.evaluation = {'metric': ['segm'], 'save_best' : 'segm_mAP'}
cfg.work_dir = os.path.join(PROJECT_FOLDER, MODEL_NAME)
cfg.img_size = MAX_SIZE
cfg.model.bbox_head.num_classes = CLASSES_COUNT
cfg.model.test_cfg.nms.iou_threshold=0.5
cfg.img_scale

In [None]:
#### Set pretain_weights
cfg.load_from = pretrain_weights_path

In [None]:
workflow = [('train', 1), ('val', 1)]
cfg.workflow = workflow

img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
cfg.img_norm_cfg = img_norm_cfg
cfg.data_root = ''

In [None]:
from mmdet.datasets import PIPELINES

try:
    @PIPELINES.register_module()
    class DataAsList:
        def __call__(self, results):
            aug_data_dict = {key: [val] for key, val in results.items()}
            return aug_data_dict
except Exception as e:
    print(e)

In [None]:
cfg.img_scale = (MAX_SIZE, MAX_SIZE)
dataset_type = 'CocoDataset'

cfg.train_pipeline = [
    {'type': 'LoadImageFromFile'},
    {'type': 'LoadAnnotations', 'with_bbox': True},
    {'type': 'Resize', 'img_scale' : (MAX_SIZE, MAX_SIZE), 'keep_ratio': False},
    {'type': 'FilterAnnotations', 'min_gt_bbox_wh': (4.0, 4.0)},
    {'type': 'RandomFlip', 'flip_ratio': 0.5},
    {'type': 'Pad', 'size_divisor':32},
    {'type': 'Normalize', **cfg.img_norm_cfg},
    {'type': 'DefaultFormatBundle'},
    {'type': 'Collect', 'keys': ['img', 'gt_bboxes', 'gt_labels']}
]

cfg.test_pipeline = [
    {'type': 'LoadImageFromFile'},
    {'type': 'Resize', 'img_scale' : (MAX_SIZE, MAX_SIZE), 'keep_ratio': False},
    {'type': 'RandomFlip', 'flip_ratio': 0.0},
    {'type': 'Pad', 'size_divisor':32},   
    {'type': 'Normalize', **cfg.img_norm_cfg},
    {'type': 'DefaultFormatBundle'},
    {'type': 'Collect', 'keys': ['img']},
    {'type': 'DataAsList'}
]

cfg.train_dataset = dict(
        type='CocoDataset',
        filter_empty_gt=False, # for empty transporter (wht obj)
        img_prefix=tmp_inputs.aug_dataset,
        ann_file=osp.join(tmp_inputs.cache_config, CONFIG["train_coco_annotation"]), 
        pipeline=cfg.train_pipeline,
        classes=CLASSES,
)

cfg.test_dataset = dict(
        type='CocoDataset',
        filter_empty_gt=False,
        img_prefix=tmp_inputs.aug_dataset,
        ann_file=osp.join(tmp_inputs.cache_config, CONFIG["val_coco_annotation"]), 
        pipeline=cfg.test_pipeline,
        classes=CLASSES,
)


data = dict(
    samples_per_gpu=BATCH,
    workers_per_gpu=WORKERS,
    train=cfg.train_dataset, 
    val=cfg.test_dataset, 
    test=cfg.test_dataset
)

cfg['data'] = ConfigDict(data)

In [None]:
%%time

import io
import re

none_parse = lambda x : ''
print('loading_dataset....')
with io.open(cfg.data.train['ann_file']) as fd:
    train_dataset = fd.read()

train_dataset = re.findall('file_name', train_dataset)

print(f"{len(train_dataset)=}")

In [None]:
ITERS_IN_ONE_EPOCH = int(len(train_dataset) / BATCH)
MAX_ITER = (ITERS_IN_ONE_EPOCH * EPOCH_COUNT) - 1
print(f"{ITERS_IN_ONE_EPOCH=}")
print(f"{MAX_ITER=}")

In [None]:
print(f"{optimizer_lr=}")

cfg.num_last_epochs = 15


cfg.optimizer = dict(type='Adam', lr=optimizer_lr)
   
cfg.optimizer_config = {} #dict(grad_clip=None)

cfg.lr_config = dict(
    # _delete_=True,
    policy= 'CosineAnnealing', #'YOLOX',
    warmup='linear',
    warmup_ratio=0.001,
    warmup_iters=int(MAX_ITER * 0.25), # 5 epoch
    min_lr_ratio=1e-5)

cfg.runner = dict(type='EpochBasedRunner', max_epochs=EPOCH_COUNT)

checkpoint_config = dict(interval=CHECKPOINT_INTERVAL)
if CHECKPOINT_INTERVAL == -1:
    checkpoint_config = None
    


log_config = dict(
    interval=(BATCH*2) if (BATCH*2) < (ITERS_IN_ONE_EPOCH / 2) else (ITERS_IN_ONE_EPOCH // 2),
    hooks=[
        dict(type='TextLoggerHook', ignore_last=False)
    ]
)


cfg.merge_from_dict({
    "os" : None,
    "ITERS_IN_ONE_EPOCH" : ITERS_IN_ONE_EPOCH,
    "MAX_ITER" : MAX_ITER,
    "EPOCH_COUNT" : EPOCH_COUNT,
    "data" : data,
    "checkpoint_config" : checkpoint_config,
    "log_level" : logging.getLevelName(logging.root.level),    
    "log_config" : log_config,
    "resume_from": None,
})

In [None]:
os.makedirs(cfg.work_dir, exist_ok=True)

In [None]:
cfg.auto_resume = True # resume from the latest checkpoint automatically

if cfg.load_from is not None:
    cfg.auto_resume = False

cfg.gpu_ids = [torch.cuda.current_device()]
cfg.device='cuda'

In [None]:
print(f'Config:\n{cfg.pretty_text}')

In [None]:
# dump config
config_file = osp.join(cfg.work_dir, "last_cfg.py")
cfg.dump(file=config_file)

In [None]:
CONFIG['config_file'] = config_file
CONFIG['work_dir']    = cfg.work_dir

with open(config_fn, 'w') as f_id:
    json.dump(CONFIG, f_id, indent=4)

In [None]:
!ls {cfg.work_dir}

In [None]:
#stop spark
SinaraSpark.stop_session()