In [2]:
import glob
import os

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [3]:
def open_camera():
    cap = cv.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open camera")
        exit()
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        # Our operations on the frame come here
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        # Display the resulting frame
        cv.imshow("frame", gray)
        if cv.waitKey(1) == ord("q"):
            break
    # When everything done, release the capture
    cap.release()
    cv.destroyAllWindows()


def play_video(video_input, annotation):
    print(f"Showing image annotation {annotation}")
    annotation_df = pd.read_csv(annotation, sep="\t", header=None, dtype=float)
    cap = cv.VideoCapture(video_input)
    frames = []
    idx = 0

    cv.startWindowThread()

    while cap.isOpened():
        ret, frame = cap.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        frames.append(gray)
        try:
            x1, y1, width, height = [
                int(val) for val in annotation_df.iloc[idx].tolist()
            ]
            cv.rectangle(gray, (x1, y1), (x1 + width, y1 + height), (255, 0, 0), 2)
        except Exception as e:
            print(x1, y1, width, height, e)
        cv.imshow("frame", gray)
        idx += 1
        if cv.waitKey(1) == ord("q"):
            break
        # if cv.getWindowProperty('frame', cv.WND_PROP_VISIBLE) < 1:
        #     break
    cap.release()
    cv.destroyAllWindows()
    for i in range(10):
        cv.waitKey(1)


# for idx, (video_file, annotation) in enumerate(data):
#     if idx < 10:
#         continue
#     if idx > 100:
#         break
#     play_video(video_file, annotation)

In [18]:
import matplotlib.patches as patches


def get_pixels(img_file):
    img = Image.open(img_file)
    pixels = np.asarray(img)
    return pixels


def get_yolo_style_bbox_rect(bbox_coord_file, width, height):
    bbox_coord_df = pd.read_csv(bbox_coord_file, sep=" ", header=None)
    bbox_coord_df.columns = ["label", "x", "y", "w", "h"]

    rect_patch_list = []
    for idx, row in bbox_coord_df.iterrows():
        rel_x, rel_y, rel_bbox_width, rel_bbox_height = (
            row["x"],
            row["y"],
            row["w"],
            row["h"],
        )
        rect_patch_list.append(
            get_rect_box_yolo(
                rel_x, rel_y, rel_bbox_width, rel_bbox_height, width, height
            )
        )
    return rect_patch_list


def get_rect_box_yolo(rel_x, rel_y, rel_bbox_width, rel_bbox_height, width, height):
    xmin = width * (rel_x - rel_bbox_width / 2)
    ymin = height * (rel_y - rel_bbox_height / 2)
    patch_width = rel_bbox_width * width
    patch_height = rel_bbox_height * height
    # Create a Rectangle patch
    rect_patch = patches.Rectangle(
        (xmin, ymin),
        patch_width,
        patch_height,
        linewidth=1,
        edgecolor="r",
        facecolor="none",
    )

    return rect_patch


def get_rect_box(rel_xmin, rel_xmax, rel_ymin, rel_ymax, width, height):
    xmin = width * rel_xmin
    ymin = height * rel_ymin
    patch_width = (rel_xmax - rel_xmin) * width
    patch_height = (rel_ymax - rel_ymin) * height
    # Create a Rectangle patch
    rect_patch = patches.Rectangle(
        (xmin, ymin),
        patch_width,
        patch_height,
        linewidth=1,
        edgecolor="r",
        facecolor="none",
    )

    return rect_patch


def get_img_with_bounding_box(img_file, bbox_coord_file):
    img_basename = os.path.basename(img_file).removesuffix(".jpg")
    pixels = get_pixels(img_file)
    height, width, _ = pixels.shape
    bbox_coord_df = pd.read_csv(bbox_coord_file, sep=" ", header=None)

    rect_patch_list = []
    for idx, row in bbox_coord_df.iterrows():
        _, rel_xmin, rel_xmax, rel_ymin, rel_ymax = (
            row["1"],
            row["2"],
            row["3"],
            row["4"],
        )
        if show_img:
            rect_patch_list.append(
                get_rect_box(rel_xmin, rel_xmax, rel_ymin, rel_ymax, width, height)
            )

    if show_img:
        fig, ax = plt.subplots()
        ax.imshow(pixels)
        # Add the patch to the Axes
        for rect in rect_patch_list:
            ax.add_patch(rect)
        plt.show()

In [30]:
from PIL import Image

img_file = os.path.join(basedir, "obj_train_data/frame_000000.PNG")
label_file = os.path.join(basedir, "obj_train_data/frame_000000.txt")

In [58]:
import glob
import os

# basedir = "/Users/javkhlan-ochirganbat/temp/yellow_shrimps/"


def cleanup_no_label_img(basedir: str):
    for label_file in sorted(
        glob.glob(os.path.join(basedir, "obj_train_data/frame_*.txt"))
    ):
        img_file = label_file.replace(".txt", ".PNG")
        try:
            pd.read_csv(label_file)
        except pd.errors.EmptyDataError:
            # print(f"No annotation {label_file} removing image and label")
            os.remove(label_file)
            os.remove(img_file)

    # pixels = get_pixels(img_file)
    # bbox_rect_list = get_yolo_style_bbox_rect(label_file, pixels.shape[1], pixels.shape[0])

    # fig, ax = plt.subplots(figsize=(6, 8))
    # ax.imshow(pixels)
    # # Add the patch to the Axes
    # for rect in bbox_rect_list:
    #     ax.add_patch(rect)
    # plt.show()

In [101]:
import random
from dataclasses import dataclass


def mv_annot_img_and_label(prefix, img_file, label_file, dest_img_dir, dest_label_dir):
    new_label_file = os.path.basename(label_file).replace("frame", prefix)
    new_img_file = os.path.basename(img_file).replace("frame", prefix)
    shutil.copyfile(img_file, os.path.join(dest_img_dir, new_img_file))
    shutil.copyfile(label_file, os.path.join(dest_label_dir, new_label_file))


@dataclass
class CVATAnnot:
    video_name: str
    annot_dir: str
    downsample_percent: float = 1.0  # 1 for all image
    max_image_count: int = -1  # -1 for all image

    def __init__(
        self, video_name, annot_dir, downsample_percent=1, max_image_count=None
    ):
        self.video_name = video_name
        self.annot_dir = annot_dir
        total_img_cnt = len(os.listdir(annot_dir)) // 2
        if max_image_count and max_image_count < total_img_cnt:
            self.downsample_percent = np.round(max_image_count / total_img_cnt, 2)
            self.max_image_count = max_image_count
        else:
            self.downsample_percent = downsample_percent

    def output_fltrd_data(self, outdir: str):
        dest_img_dir = os.path.join(outdir, "train/images/shrimp/")
        dest_label_dir = os.path.join(outdir, "train/labels/shrimp/")
        for label_file in sorted(glob.glob(os.path.join(self.annot_dir, "*.txt"))):
            img_file = label_file.replace(".txt", ".PNG")
            if random.random() <= self.downsample_percent:
                mv_annot_img_and_label(self.video_name, img_file, label_file, dest_img_dir, dest_label_dir)


In [102]:
annot_basedirs = [
    "/Users/javkhlan-ochirganbat/temp/tet_visit_shrimp_1a/obj_train_data/",
    "/Users/javkhlan-ochirganbat/temp/tet_visit_shrimp_1b/obj_train_data/",
    "/Users/javkhlan-ochirganbat/temp/tet_visit_shrimp_1c/obj_train_data/",
    "/Users/javkhlan-ochirganbat/temp/yellow_shrimps/obj_train_data/",
]

cvat_annot_obj = {}
outdir = "/Users/javkhlan-ochirganbat/temp/shrimp_dataset_v2/"

for basedir in annot_basedirs:
    cleanup_no_label_img(basedir=basedir)
    vid_name = basedir.rstrip("/").split("/")[-2]
    cvat_obj = CVATAnnot(
        video_name=vid_name, annot_dir=basedir, max_image_count=500
    )
    cvat_annot_obj[vid_name] = cvat_obj
    cvat_obj.output_fltrd_data(outdir)


In [88]:
cvat_annot_obj["tet_visit_shrimp_1c"]

CVATAnnot(video_name='tet_visit_shrimp_1c', annot_dir='/Users/javkhlan-ochirganbat/temp/tet_visit_shrimp_1c/obj_train_data/', downsample_percent=0.3, max_image_count=500)

In [66]:
import shutil

for basedir in annot_basedirs:
    basename = os.path.basename(basedir.rstrip("/"))

    label_files = sorted(glob.glob(os.path.join(basedir, "obj_train_data/frame_*.txt")))
    for label_file in label_files:
        img_file = label_file.replace(".txt", ".PNG")

        new_label_file = os.path.basename(label_file).replace("frame", basename)
        new_img_file = os.path.basename(img_file).replace("frame", basename)

        shutil.copyfile(img_file, os.path.join(dest_img_dir, new_img_file))
        shutil.copyfile(label_file, os.path.join(dest_label_dir, new_label_file))
        # print(img_file, os.path.join(dest_img_dir, new_img_file))
        # print(label_file, os.path.join(dest_label_dir, new_label_file))

In [3]:
data = glob.glob("UOT100/*/")

video_input = "UOT100/ArmyDiver1/ArmyDiver1.mp4"
annotation = "UOT100/ArmyDiver1/groundtruth_rect.txt"

df = pd.read_csv(annotation, sep="\t", header=None)

labels_dict = {
    "turtle": 0,
    "human": 1,
    "fish": 2,
    "octopus": 3,
    "squid": 4,
    "whale": 5,
    "shark": 6,
    "manta": 7,
    "dive": 1,
    "dolphin": 8,
    "eel": 9,
    "shrimp": 10,
    "diving": 2,
}

In [6]:
import os
import random
import shutil


def create_annotated_images(video_folder, labels_dict, prefix, img_count=10):
    label_name = video_folder.lower()
    ground_truth = pd.read_csv(
        os.path.join(video_folder, "groundtruth_rect.txt"), sep="\t", header=None
    )
    detected_label = None
    for label, idx in labels_dict.items():
        if label in label_name:
            detected_label = str(idx)
            break

    if not detected_label:
        print(f"Label NOT FOUND for {video_folder}")
        return

    image_list = sorted(glob.glob(os.path.join(video_folder, "img/frame*")))
    if not image_list:
        print(f"IMAGES NOT FOUND for {video_folder}")
        return
    img_height, img_width = plt.imread(image_list[0]).shape[:2]
    random.shuffle(image_list)
    selected = image_list[:img_count]

    create_dataset = True
    for img in selected:
        base_name = os.path.basename(img)
        label_idx = int(base_name.removeprefix("frame").removesuffix(".jpg"))
        new_img_name = f"{prefix:08}{label_idx}"
        if label_idx >= ground_truth.shape[0]:
            continue
        x1, y1, rec_width, rec_height = [
            int(val) for val in ground_truth.loc[label_idx].tolist()
        ]
        norm_x1 = x1 / img_width
        norm_rec_width = rec_width / img_width
        norm_y1 = y1 / img_height
        norm_rec_height = rec_height / img_height
        bounding_rect = f"{norm_x1 + norm_rec_width / 2} {norm_y1 + norm_rec_height / 2} {norm_rec_width} {norm_rec_height}"

        new_img_path = f"/Users/javkhlan-ochirganbat/repos/machine-learning/ultralytics_demo/custom_dataset/UOT100Images/images/uot100/{new_img_name}.jpg"
        if create_dataset:
            shutil.copy(img, new_img_path)
            new_label_path = f"/Users/javkhlan-ochirganbat/repos/machine-learning/ultralytics_demo/custom_dataset/UOT100Images/labels/uot100/{new_img_name}.txt"
            with open(new_label_path, "w") as outfile:
                outfile.write(f"{detected_label} {bounding_rect}\n")
            # print(detected_label, new_img_path, label_idx, label_value)
    return detected_label


# create_annotated_images("UOT100/AntiguaTurtle", labels_dict=labels_dict, prefix=1)

In [3]:
# Set to True to generate annotated image

if False:
    for idx, folder in enumerate(sorted(glob.glob("UOT100/*"))):
        create_annotated_images(
            folder, labels_dict=labels_dict, prefix=idx, img_count=200
        )