In [1]:
import cv2
import shutil
import numpy as np
import os
from ultralytics import YOLO

In [2]:
# source paths
rgb_source_path = "/home/hassaan/Downloads/train_2500/images_final/"
dpt_source_path = "/home/hassaan/Downloads/train_2500/dpt_final/"

# destination paths
rgb_destination_path = "/home/hassaan/Downloads/rgb_split/"
dpt_destination_path = "/home/hassaan/Downloads/dataset/images/"
text_destination_path = "/home/hassaan/Downloads/dataset/labels/"


rgb_files_list = os.listdir(rgb_source_path)
rgb_files_list.sort(key=lambda x: int(x.split(".")[0]))

In [3]:
pose_model = YOLO("models/yolov8m-pose.pt")
object_model = YOLO("models/yolov8m.pt")

In [4]:
# utils imported
from utils.box_splitting import split_people_bboxes
from utils.object_detection import object_detection
from utils.bbox_area import bbox_area
from utils.check_overlap import check_overlap_for_2

In [10]:
index = 0

for file in rgb_files_list[0:300]:
    rgb_path = rgb_source_path + file  # /home/hassaan/Downloads/allFinal/0.png
    depth_path = dpt_source_path + file  # /home/hassaan/Downloads/DPT2/0.png
    print(rgb_path, ": source")
    # read image
    rgb_frame = cv2.imread(rgb_path)
    depth_frame = cv2.imread(depth_path)

    bboxes, confs = object_detection(rgb_frame, model=object_model)
    print(bboxes)
    depth_sections, rgb_sections = split_people_bboxes(
        depth_frame=depth_frame, rgb_frame=rgb_frame, bboxes=bboxes, confs=confs
    )

    if len(depth_sections) == 0:
        print("ALL PERSON BBOXES ARE TINY")

    elif len(depth_sections) == len(rgb_sections):
        # for each section
        for n in range(len(depth_sections)):

            # write depth section to dataset/images/
            try:
                cv2.imwrite(
                    f"{dpt_destination_path}{index}.png", np.array(depth_sections[n])
                )
            except Exception as err:
                print(type(depth_sections[n]))
                print("#8#8#", err, "#8#8#")
                break

            # write rgb section to split_rgb/
            cv2.imwrite(f"{rgb_destination_path}{index}.png", np.array(rgb_sections[n]))
            print(f"{rgb_destination_path}{index}.png")
            index += 1
    print("index:", index)
    print("=" * 30)
    # if index > 30:
    #     break  # temporary

/home/hassaan/Downloads/train_2500/images_final/0.png : source



0: 384x640 8 persons, 1 tie, 1310.4ms
Speed: 4.5ms preprocess, 1310.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[[80, 64, 215, 446], [587, 45, 768, 488], [935, 42, 1199, 668], [272, 83, 458, 512]]
CASE 4
[[80, 64, 215, 446], [272, 83, 458, 512], [587, 45, 768, 488], [935, 42, 1199, 668]]
-20691 -142104 -275040 -52245 -204633 -73981
CASE 4.1
CASE 4.2
CASE 4.3
CASE 4.4
/home/hassaan/Downloads/rgb_split/0.png
/home/hassaan/Downloads/rgb_split/1.png
/home/hassaan/Downloads/rgb_split/2.png
/home/hassaan/Downloads/rgb_split/3.png
index: 4
/home/hassaan/Downloads/train_2500/images_final/1.png : source

0: 384x640 1 person, 922.0ms
Speed: 3.4ms preprocess, 922.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
[[530, 46, 806, 552]]
CASE 1
/home/hassaan/Downloads/rgb_split/4.png
index: 5
/home/hassaan/Downloads/train_2500/images_final/2.png : source

0: 480x640 1 person, 1210.6ms
Speed: 2.8ms preprocess, 1210.6ms inference, 2.6ms postprocess per image 

In [9]:
# test
bbox_area([766, 159, 930, 389])

37720

In [9]:
# test an image
file = "57.png"
index = 0

rgb_path = rgb_source_path + file  # /home/hassaan/Downloads/allFinal/0.png
depth_path = dpt_source_path + file  # /home/hassaan/Downloads/DPT2/0.png

# read image
rgb_frame = cv2.imread(rgb_path)
depth_frame = cv2.imread(depth_path)

bboxes, confs = object_detection(rgb_frame, object_model)

depth_sections, rgb_sections = split_people_bboxes(
    depth_frame=depth_frame, rgb_frame=rgb_frame, bboxes=bboxes, confs=confs
)

# try:
#     split_people_bboxes(
#         depth_frame=depth_frame, rgb_frame=rgb_frame, bboxes=bboxes, confs=confs
#     )
# except Exception as err:
#     print(err)


if len(depth_sections) == len(rgb_sections):

    # for each section
    for n in range(len(depth_sections)):

        # write depth section to dataset/images/
        cv2.imwrite(
            f"/home/hassaan/Downloads/dataset2/images/{index}.png",
            np.array(depth_sections[n]),
        )

        # write rgb section to split_rgb/
        cv2.imwrite(
            f"/home/hassaan/Downloads/rgb_split2/{index}.png",
            np.array(rgb_sections[n]),
        )

        print(f"{rgb_destination_path}{index}.png")
        index += 1
print(index)


0: 384x640 15 persons, 4 ties, 1271.6ms
Speed: 5.4ms preprocess, 1271.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
CASE 4
[[28, 11, 189, 561], [73, 150, 424, 659], [392, 160, 727, 663], [723, 174, 1089, 660]]
47676 -81403 -206658 15968 -145015 1944
CASE 4.3
CASE 4.4
/home/hassaan/Downloads/rgb_split/0.png
/home/hassaan/Downloads/rgb_split/1.png
2


In [7]:
# # test
# print(bboxes)
# # check_overlap_for_2(bboxes[0], bboxes[1])
# # bbox_area(bboxes[0]), bbox_area(bboxes[1])
# # check_overlap_for_2([996, 80, 1234, 1069], [1140, 94, 1474, 1068])
# bbox1, bbox2, bbox3, bbox4 = bboxes
# area12 = check_overlap_for_2(bbox1, bbox2)
# area13 = check_overlap_for_2(bbox1, bbox3)
# area14 = check_overlap_for_2(bbox1, bbox4)

# area23 = check_overlap_for_2(bbox2, bbox3)
# area24 = check_overlap_for_2(bbox2, bbox4)

# area34 = check_overlap_for_2(bbox3, bbox4)
# print(area12, area13, area14, area23, area24, area34)

[[316, 145, 640, 1068], [601, 120, 886, 1068], [996, 80, 1234, 1069], [1140, 94, 1474, 1068]]
35997 -328588 -461500 -104280 -240792 91556


In [8]:
# size1 = bbox_area(bbox1)
# size2 = bbox_area(bbox2)
# size3 = bbox_area(bbox3)
# size4 = bbox_area(bbox4)
# print("areas:", size1, size2, size3, size4)

areas: 299052 270180 235382 325316


In [52]:
# ###########################################################
# # utils

# # return bboxes, confs
# def object_detection(frame, show=False, model=object_model):

#     results = model(frame)
#     bboxes = []
#     confs = []
#     # Render the results
#     for result in results:
#         if result.boxes is not None:
#             for box in result.boxes:
#                 # label = model.names[int(box.cls[0])]
#                 confidence = box.conf[0]
#                 if int(box.cls[0]) == 0 and confidence > 0.4:
#                     confs.append(confidence)
#                     bbox = box.xyxy[0].numpy().astype(int)
#                     print(bbox)
#                     bboxes.append(list(bbox))
#                     x1, y1, x2, y2 = bbox
#                 # to make sure people with more than 3 images get used up in the image splitting part and dont get skipped.
#                 if len(bboxes) == 3:
#                     break
#                     # Draw bounding box and label
#                     # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#                     # cv2.putText(
#                     #     frame,
#                     #     f"{box.cls[0]} {confidence:.2f}",
#                     #     (x1, y1 - 10),
#                     #     cv2.FONT_HERSHEY_SIMPLEX,
#                     #     0.5,
#                     #     (0, 255, 0),
#                     #     2,
#                     # )
#     while show:
#         cv2.imshow("C", frame)
#         if cv2.waitKey(10) & 0xFF == ord("q"):
#             cv2.destroyAllWindows()
#             break

#     return bboxes, confs

# # return overlapping_corners, area
# def check_overlap_for_2(box1, box2):
#     # bbox is TL and BR corner coordinates
#     x1, y1, x2, y2 = box1
#     a1, b1, a2, b2 = box2

#     box1_corners = [
#         (x1, y1),  # TL
#         (x2, y1),  # TR
#         (x1, y2),  # BL
#         (x2, y2),  # BR
#     ]

#     while False:
#         corner_overlap_areas = {
#             "TL": ((a2 - x1) * (b2 - y1)),
#             "TR": ((x1 - a1) * (b2 - y2)),
#             "BL": ((a2 - x1) * (y2 - b1)),
#             "BR": ((x2 - a1) * (y2 - b1)),
#         }

#     overlapping_corners = 0
#     single_corener = ()  # returns corner for single corner case with small overlap
#     for corner in box1_corners:
#         x, y = corner
#         # check if x1,y1 is in box2
#         if (x >= a1 and x <= a2) and (y >= b1 and y <= b2):
#             overlapping_corners += 1
#             single_corener = (x, y)

#     area = 0
#     if overlapping_corners > 0:
#         # calculate the coordinates of the overlapping rectangle
#         x_left = max(x1, a1)
#         y_top = max(y1, b1)
#         x_right = min(x2, a2)
#         y_bottom = min(y2, b2)

#         # calculate overlap area
#         overlap_width = x_right - x_left
#         overlap_height = y_bottom - y_top
#         area = overlap_width * overlap_height

#     return overlapping_corners, area

# # return depth sections and rgb sections [], []
# def split_people_bboxes(depth_frame, rgb_frame, bboxes, confs, index):

#     # 1 person
#     if len(bboxes) == 1:
#         print("CASE 1")
#         # return depthmap of corresponding image as it is into dataset
#         return [depth_frame][rgb_frame]

#     # 2 person
#     elif len(bboxes) == 2:
#         print("CASE 2")
#         bbox1, bbox2 = bboxes
#         overlapping_corners, area = check_overlap_for_2(bbox1, bbox2)

#         # no overlap
#         if area == 0:
#             print("CASE 2.1")
#             # sort bboxes to organize by TL corner
#             bboxes.sort(key=lambda x: x[0])
#             box1, box2 = bboxes
#             x1, y1, x2, y2 = box1
#             a1, b1, a2, b2 = box2

#             # not right, above or below. :. box1 is to the left of box2
#             if x2 <= a1:
#                 print("CASE 2.1.1")
#                 mid_x = (x2 + a1) // 2

#                 depth_section1 = depth_frame[:mid_x, :]
#                 depth_section2 = depth_frame[mid_x:, :]

#                 rgb_section1 = rgb_frame[:mid_x, :]
#                 rgb_section2 = rgb_frame[mid_x:, :]

#             # box1 is above or below box2
#             else:
#                 print("CASE 2.1.2")
#                 if y2 >= b1:  # if above
#                     print("CASE 2.1.2.1")
#                     mid_y = (y2 + b1) // 2

#                 elif b2 >= y1:  # if below
#                     print("CASE 2.1.2.2")
#                     mid_y = (y1 + b2) // 2

#                 depth_section1 = depth_frame[:, :mid_y]
#                 depth_section2 = depth_frame[:, mid_y:]

#                 rgb_section1 = rgb_frame[:, :mid_y]
#                 rgb_section2 = rgb_frame[:, mid_y:]

#             return [depth_section1, depth_section2], [rgb_section1, rgb_section2]

#         # if overlap
#         elif area > 0:
#             print("CASE 2.2")
#             x1, y2, x2, y2 = bbox1
#             a1, b1, a2, b2 = bbox2
#             if confs[0] > confs[1] + 15:
#                 depth_section1 = depth_frame[y1:y2, x1:x2]
#                 rgb_section1 = rgb_frame[y1:y2, x1:x2]

#                 return [depth_section1], [rgb_section1]

#             elif confs[0] < confs[1] + 15:
#                 depth_section1 = depth_frame[b1:b2, a1:a2]
#                 rgb_section1 = rgb_frame[b1:b2, a1:a2]

#                 return [depth_section1], [rgb_section1]

#             else:
#                 return [], []

#             # corner overlap
#             if False and overlapping_corners == 1:
#                 print("CASE 2.2.1")
#                 x1, y1, x2, y2 = bboxes[0]
#                 a1, a2, b1, b2 = bboxes[1]
#                 # if overlap area is less than 30% area of bbox2
#                 if area < 0.3 * (a2 - a1) * (b2 - b1) or area < 0.3 * (x2 - x1) * (
#                     y2 - y1
#                 ):
#                     print("CASE 2.2.1.1")
#                     bboxes.sort(key=lambda x: x[0])
#                     box1, box2 = bboxes

#                     mid_x = (box1[2] + box2[0]) // 2

#                     depth_section1 = depth_frame[:, :mid_x]
#                     depth_section2 = depth_frame[:, mid_x:]

#                     rgb_section1 = rgb_frame[:, :mid_x]
#                     rgb_section2 = rgb_frame[:, mid_x:]

#                     return [depth_section1, depth_section2], [
#                         rgb_section1,
#                         rgb_section2,
#                     ]

#                 # if overlap area is more than 30% area of bbox2
#                 else:
#                     print("CASE 2.2.1.2")
#                     box1, box2 = bboxes
#                     # box1 is significantly more recognizable than box2
#                     crop_bbox1 = depth_frame[y1:y2, x1:x2]
#                     crop_bbox2 = depth_frame[b1:b2, a1:a2]

#                     # box1 is in front of box2
#                     avg_bbox1 = np.average(crop_bbox1)
#                     avg_bbox2 = np.average(crop_bbox1)

#                     print(avg_bbox1, avg_bbox1)

#                     if avg_bbox1 > avg_bbox2 - 20:
#                         print("CASE 2.2.1.2.1")
#                         # use only box1
#                         depth_section1 = depth_frame[
#                             box1[0] : box1[2], box1[1] : box1[3]
#                         ]
#                         rgb_section1 = rgb_frame[box1[1] : box1[3], box1[0] : box1[2]]

#                         return [depth_section1], [rgb_section1]

#                     # box2 is in front of box1
#                     elif avg_bbox2 > avg_bbox1 - 20:
#                         print("CASE 2.2.1.2.2")
#                         # use only box1
#                         depth_section1 = depth_frame[
#                             box2[1] : box2[3], box2[0] : box2[2]
#                         ]
#                         rgb_section1 = rgb_frame[box2[1] : box2[3], box2[0] : box2[2]]

#                         return [depth_section1], [rgb_section1]

#                     else:
#                         print("CASE 2.2.1.2.3")
#                         # both box are similarly recognizable and have significant overlap
#                         while False:
#                             depth_section1 = depth_frame[
#                                 box1[1] : box1[3], box1[0] : box1[2]
#                             ]
#                             depth_section2 = depth_frame[
#                                 box2[1] : box2[3], box2[0] : box2[2]
#                             ]

#                             rgb_section1 = rgb_frame[
#                                 box1[1] : box1[3], box1[0] : box1[2]
#                             ]
#                             rgb_section2 = rgb_frame[
#                                 box2[1] : box2[3], box2[0] : box2[2]
#                             ]

#                             return [depth_section1, depth_section2], [
#                                 rgb_section1,
#                                 rgb_section2,
#                             ]
#                         return [], []
#             # edge overlap
#             elif False and overlapping_corners == 2:
#                 print("CASE 2.2.2")

#                 if area < 0.3 * (a2 - a1) * (b2 - b1) or area < 0.3 * (x2 - x1) * (
#                     y2 - y1
#                 ):
#                     pass
#             # full image overlap
#             elif False:
#                 print("CASE 2.2.3")

#     # 3 person
#     elif len(bboxes) == 3:
#         print("CASE 3")
#         bbox1, bbox2, bbox3 = bboxes

#         x1, y1, x2, y2 = bbox1
#         a1, b1, a2, b2 = bbox2
#         p1, q1, p2, q2 = bbox3

#         # size is bbox area
#         size1 = (x2 - x1) * (y2 - y1)
#         size2 = (a2 - a1) * (b2 - b1)
#         size3 = (p2 - p1) * (q2 - q1)

#         # result lists
#         depth_sections, rgb_sections = [], []

#         corners12, area12 = check_overlap_for_2(bbox1, bbox2)
#         corners13, area13 = check_overlap_for_2(bbox1, bbox3)
#         corners23, area23 = check_overlap_for_2(bbox2, bbox3)

#         # if bbox1
#         if area12 < 0.1 * size1 and area13 < 0.1 * size1:
#             depth1 = depth_frame[y1:y2, x1:x2]
#             rgb1 = rgb_frame[y1:y2, x1:x2]

#             if size1 > 0.15 * depth_frame:
#                 depth_sections.append(depth1)
#                 rgb_sections.append(rgb1)

#         elif area12 < 0.1 * size2 and area23 < 0.1 * size2:
#             depth1 = depth_frame[b1:b2, a1:a2]
#             rgb1 = rgb_frame[b1:b2, a1:a2]

#             if size2 > 0.15 * depth_frame:
#                 depth_sections.append(depth1)
#                 rgb_sections.append(rgb1)

#         elif area13 < 0.1 * size3 and area23 < 0.1 * size3:
#             depth1 = depth_frame[q1:q2, p1:p2]
#             rgb1 = rgb_frame[q1:q2, p1:p2]
#             #
#             if size3 > 0.15 * depth_frame:
#                 depth_sections.append(depth1)
#                 rgb_sections.append(rgb1)
#             pass

#         return depth_sections, rgb_sections

#     # impossible because bboxes  are not appended in object detection after first 3 identified people
#     elif False and len(bboxes) > 3:
#         return [], []

#     # no person, also impossible
#     elif False and bboxes == []:
#         print("CASE 4")
#         return [], []
#     pass