In [1]:
! tar -xzf dataset.tar.gz

In [2]:
import boto3
import os
import json
import cv2
import numpy as np
import yaml

In [3]:
from utils import utils

In [4]:
AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.environ.get("AWS_S3_BUCKET")
AWS_S3_ENDPOINT = os.environ.get("AWS_S3_ENDPOINT")

TARGET_BUCKET_NAME = "label-studio-sink"

TMP_LABEL = "/tmp/ls-label.txt"
UTILS_DIR_PATH = "utils/"
LABELS_INFO = UTILS_DIR_PATH + "labels.json"
DATASET_PATH = "dataset/"
DATA_CONFIG_PATH = "utils/data.yaml"

NB_FRAMES_PER_VIDEO = 20

session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY
)
client = session.client("s3", endpoint_url=AWS_S3_ENDPOINT)

In [5]:
def generate_dirs():
    os.makedirs(DATASET_PATH, exist_ok = True)
    os.makedirs(DATASET_PATH + "labels/train", exist_ok = True)
    os.makedirs(DATASET_PATH + "labels/test", exist_ok = True)
    os.makedirs(DATASET_PATH + "labels/val", exist_ok = True)
    os.makedirs(DATASET_PATH + "images/train", exist_ok = True)
    os.makedirs(DATASET_PATH + "images/test", exist_ok = True)
    os.makedirs(DATASET_PATH + "images/val", exist_ok = True)

In [6]:
generate_dirs()
with open(LABELS_INFO) as f:
    label_info = json.load(f)

video_path = UTILS_DIR_PATH + "video/video1.mp4"
random_frames = utils.extract_random_frames(video_path, NB_FRAMES_PER_VIDEO)
nb_frames = len(random_frames)
# Cut frames and insert image
for i, frame in enumerate(random_frames):
    # Insert image onto frame
    frame_height, frame_width, _ = frame.shape
    for label in label_info.keys():
        label_id = label_info[label]["label_id"]
        for traffic_sign_info in label_info[label]["children"].values():
            image_path = UTILS_DIR_PATH + traffic_sign_info["path"]
            bbox_size = (int(traffic_sign_info["bbox_w"]), int(traffic_sign_info["bbox_h"]))
            inserted_image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
            if inserted_image is None:
                raise Exception("Error: Couldn't load the inserted image file.")
            frame_copy = frame.copy()
            frame_copy, coordinates = utils.insert_image(frame_copy, inserted_image, frame_width, frame_height, bbox_size)

            # Save labels and images in yolo format
            yolo_format = utils.to_yolo_format(frame_width, frame_height, coordinates)
            split = utils.determine_split(i, nb_frames)
            utils.add_to_yolo_dataset(frame_copy, split, label_id, label, yolo_format, DATASET_PATH)

In [7]:
! tar -czf dataset-full.tar.gz dataset