In [10]:
import torch
import cv2
import numpy as np
import pyrealsense2 as rs
import pandas as pd

In [11]:

class ObjectDection:
    def __init__(self,file1,file2=None,isRealsense=False):
        self.video = file1
        self.right_camera = file2
        self.model = self.load_model()
        self.isRealsense = isRealsense


    def load_model(self):
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
        return model
    
    def camera_setup(self):
        pipeline = rs.pipeline()
        pipeline_wrapper = rs.pipeline_wrapper(pipeline)
        config = rs.config()
        pipeline_profile = config.resolve(pipeline_wrapper)
        device = pipeline_profile.get_device()
        device_product_line = str(device.get_info(rs.camera_info.product_line))

        config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
        config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

        # Start streaming
        pipeline.start(config)
        return pipeline
     

    def capture(self):
        if self.isRealsense:
            pipeline = self.camera_setup()

        cap = cv2.VideoCapture(self.video)
        #cap_right = cv2.VideoCapture(self.right_camera)
        while True:
            if self.isRealsense:
                ret,color_image,depth_image = self.get_frame(pipeline)
            else:
                ret,color_image = cap.read()
                depth_image = None

            if ret:
                result_object = self.detect_object(color_image)

                cords = self.write_result(result_object,color_image)
                if depth_image:
                    self.write_depth(color_image,depth_image,cords)
                cv2.imshow("frame",color_image)
                key = cv2.waitKey(1)
                if key == 27 or key == ord("q"):
                    break
        print("Finish")
        cap.release()
        cv2.destroyAllWindows()
        


    def detect_object(self,color_frame):
        #color_frame = [color_frame]
        results = self.model(color_frame)
        #labels,cord = results.xyxyn[0][:,-1], results.xyxyn[0][:,:-1]

        return results

    def detect_depth(self,depth_image,point):
            return depth_image[point[1], point[0]]


    
    def __call__(self):
        self.capture()

    def get_frame(self,pipeline):
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()

        depth_image = np.asanyarray(depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())
        if not depth_frame or not color_frame:
            return False, None, None
        return True, depth_image, color_image


   

    def write_result(self,results,frame):
        confidence_threshold = 0.25
        
        result_to_pandas = results.pandas().xyxy[0]
        results = result_to_pandas.to_numpy()
       
        num_of_objects = len(result_to_pandas)
        cords = []
        for i in range(num_of_objects):
            
            if results[i,4] > confidence_threshold:
                x_min,y_min,x_max,y_max = int(results[i,0]),int(results[i,1]),int(results[i,2]),int(results[i,3])
                cv2.rectangle(frame,(x_min,y_min),(x_max,y_max),(0,255,0),2)

                cv2.putText(frame,str(results[i,6]),(x_min,y_min),cv2.FONT_HERSHEY_SIMPLEX,1,(0, 255, 0),2,cv2.LINE_AA)
                cords.append((x_min,y_min))
        return cords
    
    def write_depth(self,frame,depth_image,cords):
        for cord in cords:
            distance = self.detect_depth(depth_image,cord)
            cv2.putText(frame,distance,cord,cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2,cv2.LINE_AA)


In [12]:
object_detect  = ObjectDection("video.mp4")
object_detect()

Using cache found in C:\Users\admin/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-7-1 Python-3.10.9 torch-1.12.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
