In [1]:
import cv2
import os
import math
import mmcv
import pandas as pd
import torch
from time import sleep
import numpy as np
from matplotlib import pyplot as plt
from ultralytics import YOLO
from ultralytics.yolo.utils.plotting import Annotator

from mmpose.apis import inference_top_down_pose_model, init_pose_model, vis_pose_result
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


%run utils/superimposition/functions.ipynb
%run utils/extraction/objClass.ipynb



In [2]:
class Configuration:
    def __init__(self, main_img, seg_net, video_format, fps, file_name):
        self.main_img = main_img
        self.seg_net = seg_net
        self.fourcc = cv2.VideoWriter_fourcc(*video_format)
        self.fps = fps
        self.main_path = f"videos/{file_name}/"
        self.processed_path = self.main_path + "processed/"
        self.pose_config = "utils/skeleton/hrnet_w32_coco_256x192.py"
        self.pose_checkpoint = "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth"

# List available video files
file_list = os.listdir("videos")
selected_file = file_list[2]

print(file_list, "\nSelected File:", selected_file)

# Create a configuration object
config = Configuration(main_img="RGB", seg_net="utils/segmentation/yolov8n-seg.pt", video_format="mp4v", fps=10, file_name=selected_file)

['2022-12-01_18-03-33', '2022-12-02_13-05-04', '2023-01-24_15-28-00', '2023-01-28_15-50-17', 'combined_outisde_13.gif', 'newStereo', 'outside_test', 'SII'] 
Selected File: 2023-01-24_15-28-00


In [3]:
# Initialize the segmentation and pose models
seg_model = YOLO(config.seg_net)
pose_model = init_pose_model(config.pose_config, config.pose_checkpoint, device)

# Create an object handler and a dictionary for holding images
handler = ObjectHandler()
images = {"RGB": None, "Thermal": None, "Depth": None}

# Set the input video path
path = f"{config.processed_path}{config.main_img}_sii.mp4"

# Open the input videos
rgb_video = cv2.VideoCapture(f"{config.processed_path}RGB_sii.mp4")
thermal_video = cv2.VideoCapture(f"{config.processed_path}Thermal_sii.mp4")
depth_video = cv2.VideoCapture(f"{config.processed_path}Depth_sii.mp4")

# Initialize the output video writer
track_out = cv2.VideoWriter(f"{config.processed_path}track.mp4", config.fourcc, config.fps, (512, 512))

frame_count = 0
# Process the input videos frame by frame
for result in seg_model.track(source=path, stream=True, verbose=False, classes=[0, 2], device=0):

    # Read the frames from input videos
    ret, images["RGB"] = rgb_video.read()
    ret1, images["Thermal"] = thermal_video.read()
    ret2, images["Depth"] = depth_video.read()
    if not (ret & ret1 & ret2):
        break

    # Process the depth image
    stereo_frame_mapped = cv2.normalize(images["Depth"], None, 255, 0, cv2.NORM_INF, cv2.CV_32F)
    stereo_frame_mapped = cv2.convertScaleAbs(stereo_frame_mapped)
    stereo_frame_mapped = cv2.cvtColor(stereo_frame_mapped, cv2.COLOR_BGR2GRAY)
    stereo_frame_mapped = cv2.equalizeHist(stereo_frame_mapped)
    stereo_frame_mapped = cv2.applyColorMap(stereo_frame_mapped, cv2.COLORMAP_JET)
    images["Color_Depth"] = stereo_frame_mapped

    # If bounding boxes are detected
    if result.boxes and type(result.boxes.id) != type(None):

        # Collect Properties of Objects
        classes = result.boxes.cls
        masks = result.masks.data.cpu()
        boxes_array = np.array(result.boxes.xyxy.cpu())
        track_ids = result.boxes.id.cpu()
        frame_num = np.ones_like(classes) * frame_count
        object_results = [{'class': cls.item(), 'track_id': track_id.item(), 'bbox': bbox, 'mask': mask, 'frame_num': frame} 
                          for cls, track_id, bbox, mask, frame in zip(classes, track_ids, boxes_array, masks, frame_num)]

        # Apply masks to the images
        images["Superimposed"] = apply_mask(images.copy(), masks, config.main_img)

        # Perform pose estimation
        object_results = inference_top_down_pose_model(pose_model, images[config.main_img], object_results, format='xyxy')[0]

        # Add object results to the handler
        handler.append_objects(images, object_results)
        images["Superimposed"] = vis_pose_result(pose_model, images["Superimposed"], object_results, bbox_color=(0, 0, 0))

        # Initialize annotator for visualizing the results
        annotator = Annotator(images["Superimposed"])

        # Annotate the results with bounding boxes and track IDs
        for obj in object_results:
            color = handler.object_container[obj["track_id"]].color_id
            annotator.box_label(obj["bbox"], label=f'{obj["track_id"]}', color=color)
            
        # Get the final annotated frame
        frame = annotator.result()

        # Display the images
        for label in images:
            cv2.imshow(label, images[label])

        # Display the YOLO result
        cv2.imshow("YOLO", frame)

        # Write the output frame to the video
        track_out.write(images["Superimposed"])

        # Increment the frame counter
        frame_count += 1

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

            
# Release the output video and close all windows
track_out.release()
cv2.destroyAllWindows()

load checkpoint from http path: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth


In [4]:
_=handler.create_dataframe()

In [6]:
display(_)

Unnamed: 0,frame_num,track_id,class,bbox,color,temperature,spatials,keypoints
0,0.0,1,0,"[182.50389, 222.00867, 216.63762, 330.8199]","[6, 2, 0]",70.749798,"[-1.4470411539646868, -0.5234926974657135, 9.0...","[[209.66556, 231.25336, 0.7894298], [209.66556..."
1,0.0,2,0,"[39.979027, 221.66737, 65.53487, 300.42285]","[6, 5, 1]",60.338636,"[-8.19005841147257, -0.20330192733994118, 14.2...","[[56.98698, 228.35852, 0.6702892], [55.448788,..."
2,0.0,3,0,"[494.0433, 224.4111, 511.827, 280.3014]","[18, 26, 53]",94.372960,"[4.029627597555326, 0.0594608254326273, 5.7647...","[[498.84164, 232.43442, 0.84143305], [499.3874..."
3,0.0,4,0,"[431.9463, 223.09326, 450.54474, 275.00488]","[68, 88, 108]",92.652116,"[15.114705920896274, 0.56714587231576, 28.8235...","[[440.4851, 229.02457, 0.7544142], [441.49896,..."
4,0.0,5,0,"[93.43863, 232.28745, 115.77039, 303.86487]","[0, 0, 0]",70.993255,"[-4.235241204558135, -0.33782710340719735, 9.8...","[[111.94399, 238.36876, 0.77869993], [111.9439..."
...,...,...,...,...,...,...,...,...
148,21.0,4,0,"[429.81064, 223.36285, 447.5812, 275.0819]","[50, 69, 86]",96.779810,"[15.150051895550941, 0.5620337280101515, 29.29...","[[436.92816, 229.27216, 0.78384405], [437.9383..."
149,21.0,5,0,"[95.26263, 228.11926, 116.58164, 304.20755]","[2, 1, 0]",69.352697,"[-4.048438826909992, -0.2741634619563722, 9.52...","[[112.23805, 238.29904, 0.5146969], [110.00891..."
150,21.0,6,0,"[112.469986, 221.05827, 144.4051, 312.67932]","[16, 18, 21]",65.322577,"[-3.4410723424685177, -0.2931924152257727, 9.5...","[[130.67438, 229.73721, 0.80112725], [130.6743..."
151,21.0,7,0,"[405.3918, 227.14967, 423.87692, 274.89917]","[145, 179, 208]",92.205695,"[11.728330008018611, 0.3678604722557693, 26.11...","[[411.1371, 231.20653, 0.79912597], [412.0697,..."


In [9]:
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

obj_spatials = np.array(handler.object_container[2].properties['spatials']).transpose(1,0)

ax = plt.figure().add_subplot(projection='3d')

x = obj_spatials[0]
y = obj_spatials[1]
z = obj_spatials[2]

ax.plot(x, y, z, label='parametric curve')

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

ax.legend()

plt.show()