In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
    inputs = 
    [
      { STEP_NAME: "data_load", ENTITY_NAME: "yolox_pth_pretrain_weights" }, # pretrain weights prepared on data_load step
    ],
    # tmp results from previous step
    tmp_inputs = 
    [
        { ENTITY_NAME: "obj_detect_train_work_dir" }  # temporary working dir for train
    ],
    tmp_entities = 
    [
       { ENTITY_NAME: "yolox_pth_pretrain_weights" }, # temporary pretrain weights prepared on data_load step
       { ENTITY_NAME: "obj_detect_inference_files"} # temporarily stored object detector files 
    ],
    outputs = 
    [
        { ENTITY_NAME: "obj_detect_inference_files"} # stored object detector files
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage

import glob
import os.path as osp
import os

### Initializing modules 
import torch
import copy
import time

import mmcv
from mmengine.config import Config as MmengineConfig

import mmdet
from mmengine.runner import set_random_seed as mm_set_random_seed

import json

from mmengine.runner import Runner

from pathlib import Path
import shutil

import os.path as osp
import io

In [None]:
# Checking the version of libraries and checking the availability of the cuda kernel
assert torch.cuda.is_available(), f"Cuda not available"
if torch.cuda.is_available():
    device_id = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device_id)
    print(f"{device_name=}")
    print(f"{torch.cuda.device_count()=}")

In [None]:
# run spark
from sinara.spark import SinaraSpark
from sinara.archive import SinaraArchive

spark = SinaraSpark.run_session(0)
archive = SinaraArchive(spark)
SinaraSpark.ui_url()

### Initializing obj_detector training

In [None]:
tmp_inputs = substep.tmp_inputs()
tmp_entities = substep.tmp_entities()
data_load_inputs = substep.inputs(step_name = "data_load")

train_params = step_params["train_params"]

In [None]:
mmengine_cfg_path = os.path.join(tmp_inputs.obj_detect_train_work_dir, 'last_cfg.py')
mmengine_cfg = MmengineConfig.fromfile(mmengine_cfg_path)

# set random seeds
mm_set_random_seed(train_params["SEED"], deterministic=False)

# add pretrain weights to mmengine config before training
archive.unpack_files_from_store_to_tmp(store_path=data_load_inputs.yolox_pth_pretrain_weights, tmp_entity_dir=tmp_entities.yolox_pth_pretrain_weights)

yolox_pth_pretrain_weights = glob.glob(f"{tmp_entities.yolox_pth_pretrain_weights}/*.pth")
mmengine_cfg.load_from = yolox_pth_pretrain_weights[0]

### Start obj_detector training 

In [None]:
runner = Runner.from_cfg(mmengine_cfg)
runner.train()

### Collecting obj_detect_inference_files

#### Collecting test image from a validation dataset

In [None]:
tmp_entities = substep.tmp_entities()

val_coco_annotations = {}
with open(mmengine_cfg.val_dataloader.dataset.ann_file) as ann_file:
    val_coco_annotations = json.load(ann_file)

assert val_coco_annotations
src_test_image_file_name = osp.join(mmengine_cfg.val_dataloader.dataset.data_prefix.img,
                                    val_coco_annotations["images"][0]["file_name"]
                                   )
assert osp.exists(src_test_image_file_name)

test_image_file_extension = Path(src_test_image_file_name).suffix
dst_test_image_file_name = osp.join(tmp_entities.obj_detect_inference_files, f"test{test_image_file_extension}")

shutil.copy(src_test_image_file_name, dst_test_image_file_name)

#### Collecting train results
(weights, config, test image) for subsequent transfer to other components

Since during the training process intermediate weights of the neural network can be created (for example, for epochs 10, 20, 30, etc.)
then it doesn't make much sense to copy all the intermediate files to another step in the pipeline.
Therefore, we will copy the weights and the necessary configs into a separate directory and we will copy these files to outputs

In [None]:
# copy files - last and best model weights and config model to finished dir
shutil.copy(mmengine_cfg.filename, 
            osp.join(tmp_entities.obj_detect_inference_files, osp.basename(mmengine_cfg.filename))
           )

with io.open(osp.join(mmengine_cfg.work_dir, "last_checkpoint")) as fd:
    last_checkpoint = fd.read()
out_last_checkpoint = osp.join(tmp_entities.obj_detect_inference_files, "latest_checkpoint.pth")
shutil.copy(last_checkpoint, out_last_checkpoint)

best_models = glob.glob(f"{mmengine_cfg.work_dir}/*best*.pth")
for fpath in best_models:
    shutil.copy(fpath, fpath.replace(mmengine_cfg.work_dir, tmp_entities.obj_detect_inference_files))

#### Preparing the mmengine config for inference

In [None]:
# cleanup information in train config

mmengine_cfg = MmengineConfig.fromfile(osp.join(tmp_entities.obj_detect_inference_files, "last_cfg.py"))
mmengine_cfg.load_from = ""
mmengine_cfg.train_dataloader.dataset.dataset.ann_file = ""
mmengine_cfg.train_dataloader.dataset.dataset.data_prefix=dict(img="")
mmengine_cfg.val_dataloader.dataset.ann_file = ""
mmengine_cfg.val_dataloader.dataset.data_prefix=dict(img="")
mmengine_cfg.test_dataloader.dataset.ann_file = ""
mmengine_cfg.test_dataloader.dataset.data_prefix=dict(img="")
mmengine_cfg.val_evaluator.ann_file = ""
mmengine_cfg.test_evaluator.ann_file = ""
mmengine_cfg.work_dir = ""
mmengine_cfg.dump(file=osp.join(tmp_entities.obj_detect_inference_files, "last_cfg.py"))

### Save collected obj_detect_inference_files

In [None]:
outputs = substep.outputs()
archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_entities.obj_detect_inference_files, store_path=outputs.obj_detect_inference_files)

In [None]:
# stop spark
SinaraSpark.stop_session()