In [None]:
import cv2
import pypylon.pylon as py
import torch
import time
import numpy as np
from ultralytics import YOLO

# Load a MiDas model for depth estimation
model_type = "MiDaS_small"  # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
midas = torch.hub.load("intel-isl/MiDaS", model_type)

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.small_transform

# Initialize Pylon camera
icam = py.InstantCamera(py.TlFactory.GetInstance().CreateFirstDevice())
icam.Open()
icam.PixelFormat = "RGB8"

# Load YOLOv8 model
yolo_model = YOLO('yolov8n.pt')

# ROI vertices for masking
roi_vertices = np.array([[(0.3 * 1920, 1200), (0.52 * 1920, 0.5 * 1200), (0.6 * 1920, 0.495 * 1200), (0.9 * 1920, 1200)]], dtype=np.int32)


while True:
    img = icam.GrabOne(4000)
    img_rgb = img.Array
    frame = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)

    start = time.time()

    # Depth estimation
    with torch.no_grad():
        prediction = midas(transform(frame).to(device))
        depth_map = prediction.squeeze().cpu().numpy()
        depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX)
        
    # Mask the frame and apply YOLOv8 only within ROI
    mask = np.zeros_like(frame)
    cv2.fillPoly(mask, roi_vertices, (255, 255, 255))
    frame_roi = cv2.bitwise_and(frame, mask)
    
    # Convert frame_roi to the appropriate format for YOLOv8
    frame_roi = cv2.cvtColor(frame_roi, cv2.COLOR_BGR2RGB)
    
    # Perform object detection using YOLOv8
    yolo_outputs = yolo_model.predict(frame_roi)

    # Flag to determine if brake needs to be applied
    apply_brake = False

    for output in yolo_outputs:
        # Iterate over each bounding box in the current detection
        for j in range(len(output.boxes)):
            label = output.names[output.boxes.cls[j].item()]
            coordinates = output.boxes.xyxy[j].tolist()
            confidence = np.round(output.boxes.conf[j].item(), 2)
    
            # Extract the coordinates
            x_min, y_min, x_max, y_max = map(int, coordinates)
    
            # Draw bounding box on the normal frame
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.rectangle(depth_map, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
    
            # Convert bounding box coordinates to depth map dimensions
            x_min_depth, y_min_depth = int(x_min * depth_map.shape[1] / frame.shape[1]), int(y_min * depth_map.shape[0] / frame.shape[0])
            x_max_depth, y_max_depth = int(x_max * depth_map.shape[1] / frame.shape[1]), int(y_max * depth_map.shape[0] / frame.shape[0])
            cv2.rectangle(depth_map, (x_min_depth, y_min_depth), (x_max_depth, y_max_depth), (0, 255, 0), 2)
    
            # Extract depth values within the bounding box
            depth_values = depth_map[y_min_depth:y_max_depth, x_min_depth:x_max_depth]
            
            # Check if the depth values are not empty
            if depth_values.size > 0:
                max_depth = np.max(depth_values)
                print(f"Maximum depth within bounding box: {max_depth}")

                # Apply brake if the maximum depth within the bounding box exceeds a threshold
                if max_depth > 0.6:
                    apply_brake = True
                    print("Apply Brake")

    # Create an image for displaying brake application information
    brake_info_img = np.zeros((100, 800, 3), dtype=np.uint8)
    brake_info_img.fill(255)  # White background

    # Add text to brake_info_img
    brake_info_text = "Apply Brake" if apply_brake else "No Brake Applied"
    cv2.putText(brake_info_img, brake_info_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    end = time.time()
    totalTime = end - start
    fps = 1 / totalTime

    # Convert depth map to color for visualization
    depth_map_color = cv2.applyColorMap((depth_map * 255).astype(np.uint8), cv2.COLORMAP_JET)

    # Display depth map and normal frame
    cv2.putText(frame, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)
    cv2.polylines(frame, roi_vertices, True, (0, 255, 0), 2)  # Draw ROI on the normal frame
    depth_map_vis = cv2.resize(depth_map_color, (960, 600))
    frame_viz = cv2.resize(frame,(960, 600))

    # Display frames
    cv2.imshow('Depth Estimation', depth_map_vis)
    cv2.imshow('Normal Frame', frame_viz)
    cv2.imshow('Brake Info', brake_info_img)

    key = cv2.waitKey(1)
    if key == 27:
        break

icam.Close()
cv2.destroyAllWindows()


Using cache found in C:\Users\Vijay Meher/.cache\torch\hub\intel-isl_MiDaS_master


Loading weights:  None


Using cache found in C:\Users\Vijay Meher/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master
Using cache found in C:\Users\Vijay Meher/.cache\torch\hub\intel-isl_MiDaS_master
  icam.PixelFormat = "RGB8"



0: 416x640 (no detections), 46.6ms
Speed: 7.6ms preprocess, 46.6ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 45.0ms
Speed: 5.0ms preprocess, 45.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 40.2ms
Speed: 5.1ms preprocess, 40.2ms inference, 0.9ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 39.8ms
Speed: 4.9ms preprocess, 39.8ms inference, 1.7ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 37.7ms
Speed: 4.2ms preprocess, 37.7ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 20.1ms
Speed: 4.0ms preprocess, 20.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 20.6ms
Speed: 4.9ms preprocess, 20.6ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 (no detections), 20.0ms
Speed: 4.0ms preprocess, 20.0ms i