In [1]:
import glob
import os

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
def open_camera():
    cap = cv.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open camera")
        exit()
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        # Our operations on the frame come here
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        # Display the resulting frame
        cv.imshow("frame", gray)
        if cv.waitKey(1) == ord("q"):
            break
    # When everything done, release the capture
    cap.release()
    cv.destroyAllWindows()

In [3]:
data = glob.glob("UOT100/*/")

video_input = "UOT100/ArmyDiver1/ArmyDiver1.mp4"
annotation = "UOT100/ArmyDiver1/groundtruth_rect.txt"

df = pd.read_csv(annotation, sep="\t", header=None)

In [4]:
labels_dict = {
    "turtle": 0,
    "human": 1,
    "fish": 2,
    "octopus": 3,
    "squid": 4,
    "whale": 5,
    "shark": 6,
    "manta": 7,
    "dive": 1,
    "dolphin": 8,
    "eel": 9,
    "shrimp": 10,
    "diving": 2,
}

In [5]:
for k, v in labels_dict.items():
    print(f"{v}: {k}")

0: turtle
1: human
2: fish
3: octopus
4: squid
5: whale
6: shark
7: manta
1: dive
8: dolphin
9: eel
10: shrimp
2: diving


In [6]:
import os
import random
import shutil


def create_annotated_images(video_folder, labels_dict, prefix, img_count=10):
    label_name = video_folder.lower()
    ground_truth = pd.read_csv(
        os.path.join(video_folder, "groundtruth_rect.txt"), sep="\t", header=None
    )
    detected_label = None
    for label, idx in labels_dict.items():
        if label in label_name:
            detected_label = str(idx)
            break

    if not detected_label:
        print(f"Label NOT FOUND for {video_folder}")
        return

    image_list = sorted(glob.glob(os.path.join(video_folder, "img/frame*")))
    if not image_list:
        print(f"IMAGES NOT FOUND for {video_folder}")
        return
    img_height, img_width = plt.imread(image_list[0]).shape[:2]
    random.shuffle(image_list)
    selected = image_list[:img_count]

    create_dataset = True
    for img in selected:
        base_name = os.path.basename(img)
        label_idx = int(base_name.removeprefix("frame").removesuffix(".jpg"))
        new_img_name = f"{prefix:08}{label_idx}"
        if label_idx >= ground_truth.shape[0]:
            continue
        x1, y1, rec_width, rec_height = [
            int(val) for val in ground_truth.loc[label_idx].tolist()
        ]
        norm_x1 = x1 / img_width
        norm_rec_width = rec_width / img_width
        norm_y1 = y1 / img_height
        norm_rec_height = rec_height / img_height
        bounding_rect = f"{norm_x1 + norm_rec_width / 2} {norm_y1 + norm_rec_height / 2} {norm_rec_width} {norm_rec_height}"

        new_img_path = f"/Users/javkhlan-ochirganbat/repos/machine-learning/ultralytics_demo/custom_dataset/UOT100Images/images/uot100/{new_img_name}.jpg"
        if create_dataset:
            shutil.copy(img, new_img_path)
            new_label_path = f"/Users/javkhlan-ochirganbat/repos/machine-learning/ultralytics_demo/custom_dataset/UOT100Images/labels/uot100/{new_img_name}.txt"
            with open(new_label_path, "w") as outfile:
                outfile.write(f"{detected_label} {bounding_rect}\n")
            # print(detected_label, new_img_path, label_idx, label_value)
    return detected_label


# create_annotated_images("UOT100/AntiguaTurtle", labels_dict=labels_dict, prefix=1)

In [7]:
for idx, folder in enumerate(sorted(glob.glob("UOT100/*"))):
    create_annotated_images(folder, labels_dict=labels_dict, prefix=idx, img_count=200)

Label NOT FOUND for UOT100/BallisticMissile1
Label NOT FOUND for UOT100/BallisticMissile2
Label NOT FOUND for UOT100/BoySwimming
Label NOT FOUND for UOT100/CenoteAngelita
IMAGES NOT FOUND for UOT100/CleverOctopus
Label NOT FOUND for UOT100/ClickerAndTarget
IMAGES NOT FOUND for UOT100/CoconutOctopus1
IMAGES NOT FOUND for UOT100/CoconutOctopus2
IMAGES NOT FOUND for UOT100/ColourChangingSquid
Label NOT FOUND for UOT100/CrabTrap
IMAGES NOT FOUND for UOT100/CrayFish
IMAGES NOT FOUND for UOT100/DeepSeaFish1
IMAGES NOT FOUND for UOT100/DeepSeaFish2
IMAGES NOT FOUND for UOT100/Diving360Degree1
IMAGES NOT FOUND for UOT100/Diving360Degree2
IMAGES NOT FOUND for UOT100/Diving360Degree3
IMAGES NOT FOUND for UOT100/DivingHuman
IMAGES NOT FOUND for UOT100/EelRedSeaReptile
IMAGES NOT FOUND for UOT100/FightingEels1
IMAGES NOT FOUND for UOT100/FightingEels2
IMAGES NOT FOUND for UOT100/FightingEels3
IMAGES NOT FOUND for UOT100/FishLittleMonster
IMAGES NOT FOUND for UOT100/FishLobsters2
IMAGES NOT FOUND f

In [None]:
def play_video(video_input, annotation):
    print(f"Showing image annotation {annotation}")
    annotation_df = pd.read_csv(annotation, sep="\t", header=None, dtype=float)
    cap = cv.VideoCapture(video_input)
    frames = []
    idx = 0

    cv.startWindowThread()

    while cap.isOpened():
        ret, frame = cap.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        frames.append(gray)
        try:
            x1, y1, width, height = [
                int(val) for val in annotation_df.iloc[idx].tolist()
            ]
            cv.rectangle(gray, (x1, y1), (x1 + width, y1 + height), (255, 0, 0), 2)
        except Exception as e:
            print(x1, y1, width, height, e)
        cv.imshow("frame", gray)
        idx += 1
        if cv.waitKey(1) == ord("q"):
            break
        # if cv.getWindowProperty('frame', cv.WND_PROP_VISIBLE) < 1:
        #     break
    cap.release()
    cv.destroyAllWindows()
    for i in range(10):
        cv.waitKey(1)


for idx, (video_file, annotation) in enumerate(data):
    if idx < 10:
        continue
    if idx > 100:
        break
    play_video(video_file, annotation)