In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={
    "coco_datasets_images_url": "http://images.cocodataset.org/zips/val2017.zip",
    "coco_datasets_annotations_url": "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
    "yolox_pth_pretrain_weights": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth"
}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage
import os
import os.path as osp

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
    tmp_entities = 
    [
        { ENTITY_NAME: "downloaded_archives"}, # temporarily dowloaded zip archives
        { ENTITY_NAME: "coco_datasets_images"}, # extracted temporary images from downloaded_archives zip
        { ENTITY_NAME: "coco_datasets_annotations"}, # extracted temporary annotations from downloaded_archives zip
        { ENTITY_NAME: "yolox_pth_pretrain_weights"} # temporarily dowloaded pretrain weights
    ],

    outputs =
    [
        { ENTITY_NAME: "coco_datasets_images"}, # images stored for using in next steps
        { ENTITY_NAME: "coco_datasets_annotations"}, # annotations stored for using in next steps
        { ENTITY_NAME: "yolox_pth_pretrain_weights"} # pretrain weights stored for using in next steps
    ]
)

substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# run spark
from sinara.spark import SinaraSpark
from sinara.archive import SinaraArchive

spark = SinaraSpark.run_session(0)
archive = SinaraArchive(spark)
SinaraSpark.ui_url()

### Loading coco_datasets_images and coco_datasets_annotations zip archive

In [None]:
tmp_entities = substep.tmp_entities()

coco_datasets_images_url = substep_params["coco_datasets_images_url"]
coco_datasets_annotations_url = substep_params["coco_datasets_annotations_url"]

In [None]:
# Download coco_datasets_images
!wget {coco_datasets_images_url} -O {osp.join(tmp_entities.downloaded_archives, osp.basename(coco_datasets_images_url))}

In [None]:
# Download annotation
!wget {coco_datasets_annotations_url} -O {osp.join(tmp_entities.downloaded_archives, osp.basename(coco_datasets_annotations_url))}

In [None]:
# unzip
!unzip -q {osp.join(tmp_entities.downloaded_archives, osp.basename(coco_datasets_images_url))} -d {tmp_entities.coco_datasets_images}
!unzip -q {osp.join(tmp_entities.downloaded_archives, osp.basename(coco_datasets_annotations_url))} -d {tmp_entities.coco_datasets_annotations}

#### Load pretrain weights to directory of yolox_pth_pretrain_weights

In [None]:
yolox_pth_pretrain_weights_url = substep_params["yolox_pth_pretrain_weights"]
yolox_pth_pretrain_weights_path = osp.join(tmp_entities.yolox_pth_pretrain_weights, osp.basename(yolox_pth_pretrain_weights_url))
 
!wget {yolox_pth_pretrain_weights_url} -O {yolox_pth_pretrain_weights_path}

### Archiving coco_datasets_images and coco_datasets_annotations for next step

In [None]:
# Save tmp_entities.coco_datasets_images and tmp_entities.coco_datasets_annotations to outputs of step data_load
tmp_dir_coco_datasets_images = osp.join(tmp_entities.coco_datasets_images, "val2017")
tmp_dir_coco_datasets_annotations = osp.join(tmp_entities.coco_datasets_annotations, "annotations")

outputs = substep.outputs()

archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_dir_coco_datasets_images, store_path=outputs.coco_datasets_images)
archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_dir_coco_datasets_annotations, store_path=outputs.coco_datasets_annotations)
archive.pack_files_from_tmp_to_store(tmp_entity_dir=tmp_entities.yolox_pth_pretrain_weights, store_path=outputs.yolox_pth_pretrain_weights)

In [None]:
# Stop spark
SinaraSpark.stop_session()