In [49]:
import cv2
import shutil
import numpy as np
import os
from ultralytics import YOLO

In [37]:
# source paths
rgb_source_path = "/home/hassaan/Downloads/allFinal/"
dpt_source_path = "/home/hassaan/Downloads/DPT2/"

# destination paths
rgb_destination_path = "/home/hassaan/Downloads/rgb_split/"
dpt_destination_path = "/home/hassaan/Downloads/dataset/images/"
text_destination_path = "/home/hassaan/Downloads/dataset/labels/"


rgb_files_list = os.listdir(rgb_source_path)
rgb_files_list.sort(key=lambda x: int(x.split(".")[0]))

In [7]:
pose_model = YOLO("models/yolov8m-pose.pt")
object_model = YOLO("models/yolov8m.pt")

In [None]:
###########################################################
# utils


def object_detection(frame, show=False, model=object_model):

    results = model(frame)
    bboxes = []
    confs = []
    # Render the results
    for result in results:
        if result.boxes is not None:
            for box in result.boxes:
                # label = model.names[int(box.cls[0])]
                confidence = box.conf[0]
                if int(box.cls[0]) == 0 and confidence > 0.4:
                    confs.append(confidence)
                    bbox = box.xyxy[0].numpy().astype(int)
                    print(bbox)
                    bboxes.append(list(bbox))
                    x1, y1, x2, y2 = bbox

                    # Draw bounding box and label
                    # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    # cv2.putText(
                    #     frame,
                    #     f"{box.cls[0]} {confidence:.2f}",
                    #     (x1, y1 - 10),
                    #     cv2.FONT_HERSHEY_SIMPLEX,
                    #     0.5,
                    #     (0, 255, 0),
                    #     2,
                    # )
    while show:
        cv2.imshow("C", frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            cv2.destroyAllWindows()
            break

    return bboxes, confs


# return bboxes, confs


def check_overlap_for_2(box1, box2):
    # bbox is TL and BR corner coordinates
    x1, y1, x2, y2 = box1
    a1, b1, a2, b2 = box2

    box1_corners = [
        (x1, y1),  # TL
        (x2, y1),  # TR
        (x1, y2),  # BL
        (x2, y2),  # BR
    ]

    while False:
        corner_overlap_areas = {
            "TL": ((a2 - x1) * (b2 - y1)),
            "TR": ((x1 - a1) * (b2 - y2)),
            "BL": ((a2 - x1) * (y2 - b1)),
            "BR": ((x2 - a1) * (y2 - b1)),
        }

    overlapping_corners = 0
    single_corener = ()  # returns corner for single corner case with small overlap
    for corner in box1_corners:
        x, y = corner
        # check if x1,y1 is in box2
        if (x >= a1 and x <= a2) and (y >= b1 and y <= b2):
            overlapping_corners += 1
            single_corener = (x, y)

    area = 0
    if overlapping_corners > 0:
        # calculate the coordinates of the overlapping rectangle
        x_left = max(x1, a1)
        y_top = max(y1, b1)
        x_right = min(x2, a2)
        y_bottom = min(y2, b2)

        # calculate overlap area
        overlap_width = x_right - x_left
        overlap_height = y_bottom - y_top
        area = overlap_width * overlap_height

    return overlapping_corners, area


# return overlapping_corners, area


def split_people_bboxes(depth_frame, rgb_frame, bboxes, confs, index):

    # 1 person
    if len(bboxes) == 1:
        print("CASE 1")
        # return depthmap of corresponding image as it is into dataset
        return [depth_frame][rgb_frame]

    # 2 person
    if len(bboxes) == 2:
        print("CASE 2")
        bbox1, bbox2 = bboxes
        overlapping_corners, area = check_overlap_for_2(bbox1, bbox2)

        # no overlap
        if area == 0:
            print("CASE 2.1")
            # sort bboxes to organize by TL corner
            bboxes.sort(key=lambda x: x[0])
            box1, box2 = bboxes
            x1, y1, x2, y2 = box1
            a1, b1, a2, b2 = box2

            # not right, above or below. :. box1 is to the left of box2
            if x2 <= a1:
                print("CASE 2.1.1")
                mid_x = (x2 + a1) // 2

                depth_section1 = depth_frame[:mid_x, :]
                depth_section2 = depth_frame[mid_x:, :]

                rgb_section1 = rgb_frame[:mid_x, :]
                rgb_section2 = rgb_frame[mid_x:, :]

            # box1 is above or below box2
            else:
                print("CASE 2.1.2")
                if y2 >= b1:  # if above
                    print("CASE 2.1.2.1")
                    mid_y = (y2 + b1) // 2

                elif b2 >= y1:  # if below
                    print("CASE 2.1.2.2")
                    mid_y = (y1 + b2) // 2

                depth_section1 = depth_frame[:, :mid_y]
                depth_section2 = depth_frame[:, mid_y:]

                rgb_section1 = rgb_frame[:, :mid_y]
                rgb_section2 = rgb_frame[:, mid_y:]

            return [depth_section1, depth_section2], [rgb_section1, rgb_section2]

        # if overlap
        elif area > 0:
            print("CASE 2.2")
            # corner overlap
            if overlapping_corners == 1:
                print("CASE 2.2.1")
                x1, y1, x2, y2 = bboxes[0]
                a1, a2, b1, b2 = bboxes[1]
                # if overlap area is less than 30% area of bbox2
                if area < 0.3 * (a2 - a1) * (b2 - b1) or area < 0.3 * (x2 - x1) * (y2 - y1):
                    print("CASE 2.2.1.1")
                    bboxes.sort(key=lambda x: x[0])
                    box1, box2 = bboxes

                    mid_x = (box1[2] + box2[0]) // 2

                    depth_section1 = depth_frame[:, :mid_x]
                    depth_section2 = depth_frame[:, mid_x:]

                    rgb_section1 = rgb_frame[:, :mid_x]
                    rgb_section2 = rgb_frame[:, mid_x:]

                    return [depth_section1, depth_section2], [
                        rgb_section1,
                        rgb_section2,
                    ]

                # if overlap area is more than 30% area of bbox2
                else:
                    print("CASE 2.2.1.2")
                    box1, box2 = bboxes
                    # box1 is significantly more recognizable than box2
                    crop_bbox1 = depth_frame[y1:y2,x1:x2]
                    crop_bbox2 = depth_frame[b1:b2,a1:a2]
                    
                    # box1 is in front of box2
                    avg_bbox1 = np.average(crop_bbox1)
                    avg_bbox2 = np.average(crop_bbox1)
                    
                    print(avg_bbox1, avg_bbox1)
                    
                    if avg_bbox1 > avg_bbox2 - 20:
                        print("CASE 2.2.1.2.1")
                        # use only box1 
                        depth_section1 = depth_frame[
                            box1[0] : box1[2], box1[1] : box1[3]
                        ]
                        rgb_section1 = rgb_frame[box1[1] : box1[3], box1[0] : box1[2]]

                        return [depth_section1],[rgb_section1]
                    
                    # box2 is in front of box1
                    elif avg_bbox2 > avg_bbox1 - 20:
                        print("CASE 2.2.1.2.2")
                        # use only box1
                        depth_section1 = depth_frame[
                            box2[1] : box2[3], box2[0] : box2[2]
                        ]
                        rgb_section1 = rgb_frame[box2[1] : box2[3], box2[0] : box2[2]]

                        return [depth_section1],[rgb_section1]

                    else:
                        print("CASE 2.2.1.2.3")
                        # both box are similarly recognizable and have significant overlap
                        while False:
                            depth_section1 = depth_frame[
                                box1[1] : box1[3], box1[0] : box1[2]
                            ]
                            depth_section2 = depth_frame[
                                box2[1] : box2[3], box2[0] : box2[2]
                            ]

                            rgb_section1 = rgb_frame[box1[1] : box1[3], box1[0] : box1[2]]
                            rgb_section2 = rgb_frame[box2[1] : box2[3], box2[0] : box2[2]]

                            return [depth_section1, depth_section2], [
                                rgb_section1,
                                rgb_section2
                            ]
                        return [],[]
            # edge overlap
            elif overlapping_corners == 2:
                print("CASE 2.2.2")
                
                if area < 0.3 * (a2 - a1) * (b2 - b1) or area < 0.3 * (x2 - x1) * (y2 - y1):
                        pass

            # full image overlap
            else:
                print("CASE 2.2.3")

    # 3 person
    if len(bboxes) > 2:
        print("CASE 3")
        pass

    if bboxes == []:
        print("CASE 4")
        # skip image
        pass
    pass

In [None]:
index = 0

for file in rgb_files_list:

    # define file path of rgb, depth image
    rgb_path = rgb_source_path + file  # /home/hassaan/Downloads/allFinal/0.png
    depth_path = dpt_source_path + file  # /home/hassaan/Downloads/DPT2/0.png

    # read image
    rgb_frame = cv2.imread(rgb_path)
    depth_frame = cv2.imread(depth_path)

    bboxes, confs = object_detection(rgb_frame)

    depth_sections, rgb_sections = split_people_bboxes(
        depth_frame=depth_frame,
        rgb_frame=rgb_frame,
        bboxes=bboxes,
        confs=confs,
        index=index,
    )

    if len(depth_sections) == len(rgb_sections):
        # for each section
        for n in range(len(depth_sections)):

            # write depth section to dataset/images/
            cv2.imwrite(f"{dpt_destination_path}{index}.png", depth_sections[n])

            # write rgb section to split_rgb/
            cv2.imwrite(f"{rgb_destination_path}{index}.png", rgb_sections[n])

            index += 1

    break  # temporary

In [None]:
index = 0

for file in rgb_files_list:
    
    rgb_path = rgb_source_path + file
    # /home/hassaan/Downloads/allFinal/0.png
    
    depth_path = dpt_source_path + file
    # /home/hassaan/Downloads/DPT2/0.png
    
    
    # get depth map approved ? should i not just prune the raw dataset of depth maps my self?
    # depth_map = cv2.imread(dpt_source_path+file) #assumind all depth maps have descernable human figure
    # while True:
    #     cv2.imshow(depth_map)

    # read image
    rgb_frame = cv2.imread(rgb_path)
    depth_frame = cv2.imread(depth_path)

    bboxes, confs = object_detection(rgb_frame)
    
    depth_sections, rgb_sections = split_people_bboxes(depth_frame=depth_frame, rgb_frame=rgb_frame, bboxes=bboxes, confs=confs, index = index)
    
    for section in rgb_sections:
        # write section to rgb dataset     
        cv2.write(rgb_destination_path)
        index+=1
        pass

    for section in depth_sections:
        # list of frames
        # increment index per photo
        index +=1
                
        

In [47]:
# test an image
file = "28.png"
index = 0

rgb_path = rgb_source_path + file  # /home/hassaan/Downloads/allFinal/0.png
depth_path = dpt_source_path + file  # /home/hassaan/Downloads/DPT2/0.png

# read image
rgb_frame = cv2.imread(rgb_path)
depth_frame = cv2.imread(depth_path)

bboxes, confs = object_detection(rgb_frame)

try:
    depth_sections, rgb_sections = split_people_bboxes(
        depth_frame=depth_frame,
        rgb_frame=rgb_frame,
        bboxes=bboxes,
        confs=confs,
        index=index,
    )
except Exception:
    print(
        "bboxes",
        bboxes,
        "\nconfs",
        confs,
        "\nindex",
        index,
        "\nfunction return",
        split_people_bboxes(
            depth_frame=depth_frame,
            rgb_frame=rgb_frame,
            bboxes=bboxes,
            confs=confs,
            index=index,
        ),
    )

if len(depth_sections) == len(rgb_sections):
    
    if len(depth_sections) != 0:
        index+=1
    else:    
        # for each section
        for n in range(len(depth_sections)):

            # write depth section to dataset/images/
            cv2.imwrite(f"{dpt_destination_path}{index}.png", depth_sections[n])

            # write rgb section to split_rgb/
            cv2.imwrite(f"{rgb_destination_path}{index}.png", rgb_sections[n])

            index += 1

print(index)


0: 384x640 2 persons, 2 handbags, 3 chairs, 1 potted plant, 1 tv, 1107.0ms
Speed: 2.6ms preprocess, 1107.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
[177 222 462 631]
[435  97 612 535]
CASE 2
CASE 2.2
CASE 2.2.1
CASE 2.2.1.1
2


In [30]:
dpt_source_path + file

'/home/hassaan/Downlaods/DPT2/28.png'

In [54]:
[a, b] = [10, 11, 12]
a, b

ValueError: too many values to unpack (expected 2)