In [1]:
import argparse
import os
import sys
import tempfile
from pathlib import Path

import cv2
import h5py
import numpy as np
from naoth.log import BoundingBox
from tqdm import tqdm

tools_path = "../"
sys.path.append(tools_path)
print(sys.path)

from tools import (
    download_from_minio,
    get_labelstudio_client,
    get_minio_client,
    get_postgres_cursor,
    load_image_as_yuv422_y_only_pil,
)

['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/stella/robocup/naoth-deeplearning/venv_tf/lib/python3.10/site-packages', '../']


In [2]:
def download_images_and_masks():
    sql_query = f"""SELECT ls_project_top, bucket_top FROM robot_logs WHERE ls_project_top::int = 469"""
    print(sql_query)
    pg_cur = get_postgres_cursor()
    pg_cur.execute(sql_query)
    rtn_val = pg_cur.fetchall()
    data = [x for x in rtn_val]

    mclient = get_minio_client()
    ls = get_labelstudio_client()
    print(data)
    for ls_prj, bucket_name in sorted(data):

        print(f"Working on project {ls_prj}")

        project = ls.get_project(ls_prj)
        tasks = project.get_labeled_tasks()

        # TODO download all files for a project
        # TODO move the files inside an h5 file in the current folder
        # TODO think about structure inside h5 file
        # TODO first draft I can make mask from bounding boxes like this: https://stackoverflow.com/questions/64195636/converting-bounding-box-regions-into-masks-and-saving-them-as-png-files

        download_folder = Path("./datasets")
        download_folder_images = download_folder / "images"
        Path(download_folder).mkdir(exist_ok=True, parents=True)
        for task_output in tasks:
            # label part 1
            #print(task_output)
            for anno in task_output["annotations"]:
                results = anno["result"]
                for result in results:
                    # ignore relations here
                    if result["type"] != "rectanglelabels":
                        continue
                    
                    actual_label = result["value"]["rectanglelabels"][0]
                    if not actual_label == "referee":
                        continue
                    # x,y,width,height are all percentages within [0,100]
                    x, y, width, height = (
                        result["value"]["x"],
                        result["value"]["y"],
                        result["value"]["width"],
                        result["value"]["height"],
                    )
                    img_width = result["original_width"]
                    img_height = result["original_height"]
                    # FIXME int might not be the best rounding method here - but off by one pixel is also not that bad
                    x_px = int(x / 100 * img_width)
                    y_px = int(y / 100 * img_height)
                    width_px = int(width / 100 * img_width)
                    height_px = int(height / 100 * img_height)
                    print(task_output, width_px, height_px)
                    # crop_img = img[top_left_y : bottom_right_y, top_left_x : bottom_right_x]
                    image_file_name = task_output["storage_filename"]
                    image_path = download_from_minio(
                        client=mclient,
                        bucket_name=bucket_name,
                        filename=image_file_name,
                        output_folder=download_folder_images,
                    )
                    img = cv2.imread(str(image_path))
                    crop_img = img[y_px : y_px + height_px, x_px : x_px + width_px]
                    output_name = Path(download_folder_images) / image_file_name
                    cv2.imwrite(str(output_name), crop_img)

In [5]:
download_images_and_masks()

SELECT ls_project_top, bucket_top FROM robot_logs WHERE ls_project_top::int = 469
[('469', 'fgbkngldutvkadpaxzwpjy')]
Working on project 469
{'id': 152748, 'predictions': [], 'annotations': [{'id': 106679, 'created_username': 'eliza bot bot@berlinunited.com, 2', 'created_ago': '2\xa0months, 3\xa0weeks', 'completed_by': 2, 'result': [{'id': 'FmdoTeSjVg', 'type': 'rectanglelabels', 'value': {'x': 62.87425149700595, 'y': 29.94011976047902, 'width': 10.479041916167683, 'height': 39.32135728542915, 'rotation': 0, 'rectanglelabels': ['referee']}, 'origin': 'manual', 'to_name': 'image', 'from_name': 'label', 'image_rotation': 0, 'original_width': 640, 'original_height': 480}], 'was_cancelled': False, 'ground_truth': False, 'created_at': '2024-04-25T12:23:04.669180Z', 'updated_at': '2024-07-16T12:45:39.750184Z', 'draft_created_at': None, 'lead_time': 1420.5700000000002, 'import_id': None, 'last_action': None, 'task': 152748, 'project': 469, 'updated_by': 1, 'parent_prediction': None, 'parent_a