## YOLO Testing
This file is for testing purposes. It tests YOLO on different configurations.

In [None]:
import clip
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

# Load YOLO model
# model = YOLO("../models/yolov8s-worldv2.pt")
model = YOLO("../models/yolov8x-worldv2.pt")
model.set_classes(["red chair", "blue chair", "black chair"])
model.save("../models/yolo-world2-s-experiment2.pt")
# model.save("yolo-world-s-cabinet.pt")
device = 0

In [None]:
model.set_classes(["door handle", "handle", "cabinet", "cabinet handle", "drawer handle", 
                   "appliance handle", "knob", "furniture handle", "cupboard handle"])

In [None]:
img = cv2.imread("../../Images/cabinet1_rezise.jpg")

img_copy = img.copy()
target_width = 640
scaling_factor = target_width / img_copy.shape[1]

img_copy = cv2.resize(img_copy, (target_width, int(img_copy.shape[0] * scaling_factor)))

MIDDLE_OF_IMAGE = (img_copy.shape[1] // 2, img_copy.shape[0] // 2)

# GET RESULTS FROM MODEL #
results = model(img_copy, device=device, verbose=False, conf=0.5)
detected_image = results[0].plot()

boxes = results[0].boxes  # Get boxes
box_centers = []
for box in boxes:
    x1, y1, x2, y2 = box.xyxy[0].tolist()
    print(f'confidence of box: {box.conf[0]}')
        
    # Class id (index in your set_classes)
    cls_id = int(box.cls[0])
    cls_name = results[0].names[cls_id]
    if cls_name == "chair":
        center_x = int((x1 + x2) / 2)
        center_y = int((y1 + y2) / 2)
        # Plot center point onto image
        cv2.circle(detected_image, (center_x, center_y), 8, (0, 255, 0), -1)
        box_centers.append((center_x, center_y))
# Gets vector from middle of image to the first box center
if box_centers:
    vector_to_box = (box_centers[0][0] - MIDDLE_OF_IMAGE[0], box_centers[0][1] - MIDDLE_OF_IMAGE[1])
    print(f"Vector to first box: {vector_to_box}")
    # Plot line from middle of image to the first box center
    cv2.line(detected_image, MIDDLE_OF_IMAGE, box_centers[0], (255, 0, 0), 2)

cv2.imshow("Image", detected_image)

# Close when any key is pressed
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
G = 2.3 # object height in meters
g = 3.5 # distance from camera to object in meters
f = 1.8 # focal length in mm
f = f / 1000 # convert to meters

pixel_height = 1426
pixel_width = 2534
focal_length = 23 # mm
focal_length = focal_length / 1000 # convert to meters
object_height_in_image = boxes[0].xyxy[0][3] - boxes[0].xyxy[0][1]
sensor_diagonal = 16/1.56
sensor_height = (sensor_diagonal**2 / (1 + (pixel_width / pixel_height)**2))**0.5
sensor_width = sensor_height * (pixel_width / pixel_height)

print(f"Sensor height: {sensor_height} mm")
print(f"Sensor width: {sensor_width} mm")

In [None]:
S_real = 2.3
S_px = object_height_in_image
f_mm = 23


f_px = f_mm * (pixel_height / 6.2)

Z_metric = (f_px * S_real) / S_px
print(f"Estimated distance to object: {Z_metric} meters")

In [None]:
boxes = results[0].boxes  # Boxes object
for box in boxes:
    # xyxy format: [x1, y1, x2, y2]
    x1, y1, x2, y2 = box.xyxy[0].tolist()
    
    # Confidence score
    conf = float(box.conf[0])
    
    # Class id (index in your set_classes)
    cls_id = int(box.cls[0])
    cls_name = results[0].names[cls_id]
    
    print(f"Detected {cls_name} with {conf:.2f} confidence at [{x1}, {y1}, {x2}, {y2}]")

In [None]:
# Print a point in the center of the detected box
center_x = int((x1 + x2) / 2)
center_y = int((y1 + y2) / 2)
print(f"Center of {cls_name}: ({center_x}, {center_y})")