In [15]:
# Load a COCO-pretrained YOLOv8n model
# that will create a file called yolov8n that we will use later for prediction
model = YOLO("yolov8n.pt", "v8")

# Display model information (optional)
model.info()

YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs


(129, 3157200, 0, 8.8575488)

In [1]:
import numpy as np
from ultralytics import YOLO
import random
import cv2
import pyttsx3  # This library will be used for voice notifications
import threading


In [2]:
# Initialize voice engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)  # Speed of speech
engine.setProperty('volume', 1)  # Volume level (0.0 to 1.0)

In [3]:
my_file = open("utils/cocoObjects.txt", "r")# this file have all coco pre-trained objects we will use those object on bounding boxes
# reading the file
data = my_file.read()
# split when newline ('\n') is seen
class_list = data.split("\n")
my_file.close()

In [4]:
print(class_list)

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [5]:
# just random colors for bounding boxes
Boxes = []
for i in range(len(class_list)):
    r = random.randint(0, 255)
    g = random.randint(0, 255)
    b = random.randint(0, 255)
    Boxes.append((b, g, r))

In [6]:
#resize video frames to optimise the run
frame_wid = 1240
frame_hyt = 920

In [12]:
#cap = cv2.VideoCapture("inference/videos/afriq0.MP4") # this for a video
# Ouvrir la caméra
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Cannot open camera")
    exit()

In [13]:
# Function to announce the distance and object
def announce_distance(class_name, distance):
    def speak():
        # Announce distance ranges
        if distance < 1:
            engine.say(f"Warning! A {class_name} is in front of you, less than 1 meter away.")
        elif 1 <= distance < 2:
            engine.say(f"A {class_name} is in front of you, less than 2 meters away.")
        elif 2 <= distance < 3:
            engine.say(f"A {class_name} is in front of you, about 2 to 3 meters away.")
        elif 3 <= distance < 4:
            engine.say(f"A {class_name} is in front of you, about 3 meters away.")
        elif 4 <= distance < 5:
            engine.say(f"A {class_name} is in front of you, about 4 meters away.")
        elif 5 <= distance < 6:
            engine.say(f"A {class_name} is in front of you, about 5 meters away.")
        else:
            engine.say(f"A {class_name} is far away, more than 5 meters.")

        # Run the speech engine
        engine.runAndWait()

    # Run the speaking function in a separate thread
    threading.Thread(target=speak).start()

In [None]:
# Real widths of common objects (meters)
REAL_WIDTHS = {
    "person": 0.5 ,   # Average shoulder width of a person
    "car": 1.8,      # Average width of a car
    "bottle": 0.07,  # Average width of a bottle
    "cup": 0.07
}

# Focal length (calculated using a known object at a fixed distance)
FOCAL_LENGTH = 462  # You need to calculate this once using a known object

# Load YOLO model
model = YOLO("weights/yolov8n.pt", "v8")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame. Exiting ...")
        break

    overlay = frame.copy()
    cv2.putText(overlay, "Montrez un objet pour détecter...", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    # Perform object detection using YOLO model
    detect_params = model.predict(source=[frame], conf=0.45, save=False)
    DP = detect_params[0].numpy()

    if len(DP) != 0:
        for i in range(len(detect_params[0])):
            boxes = detect_params[0].boxes
            box = boxes[i]
            clsID = int(box.cls.numpy()[0])
            conf = round(float(box.conf.numpy()[0]), 3)
            bb = box.xyxy.numpy()[0]

            class_name = class_list[clsID]

            # ===================== CALCUL DISTANCE =====================
            x1, y1, x2, y2 = map(int, bb)
            object_roi = frame[y1:y2, x1:x2]

            gray = cv2.cvtColor(object_roi, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            distance_text = "Distance: ? m"

            if contours:
                largest_contour = max(contours, key=cv2.contourArea)
                rect = cv2.minAreaRect(largest_contour)
                object_width_pixels = min(rect[1])  # Use the smaller dimension (width)

                object_width_real = REAL_WIDTHS.get(class_name, None)

                if object_width_real and object_width_pixels > 0:
                    # Calculate the distance based on object width in pixels
                    distance = (object_width_real * FOCAL_LENGTH) / object_width_pixels
                    distance_text = f"Distance: {round(distance, 2)} m"

                    # Announce the distance for this object
                    announce_distance(class_name, distance)

            # ===================== DESSINER =====================
            cv2.rectangle(frame, (x1, y1), (x2, y2), Boxes[clsID], 3)

            # Line 1: Object name + confidence
            cv2.putText(frame, f"{class_name} {conf}%", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

            # Line 2: Distance
            cv2.putText(frame, distance_text, (x1, y1 + 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

    # Display frame
    cv2.imshow("ObjectDetection", frame if len(DP) != 0 else overlay)

    # Quit if 'q' is pressed or window is closed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
    if cv2.getWindowProperty("ObjectDetection", cv2.WND_PROP_VISIBLE) < 1:
        break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

##### the nearest object
##### other objects
##### Road Sign Detection (https://www.kaggle.com/datasets/andrewmvd/road-sign-detection)
##### traffic lights  (https://github.com/daved01/cocoTraffic)