In [1]:
# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import imutils
import time
import cv2
import os
import cvzone

In [2]:
# Load YOLOv3 weights and configuration
weights_path = r"C:\Users\ragus\Spoon_detection_yolov3\spoon_detector\yolov3.weights"
config_path = r"C:\Users\ragus\Spoon_detection_yolov3\spoon_detector\yolov3.cfg"

try:
    net = cv2.dnn.readNet(weights_path, config_path)
    print("YOLOv3 model loaded successfully!")
except cv2.error as e:
    print(f"Error loading YOLOv3 model: {e}")

# Load COCO class labels (customize this based on your dataset)
try:
    with open(r"C:\Users\ragus\Spoon_detection_yolov3\spoon_detector\coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    print("COCO class labels loaded successfully!")
except FileNotFoundError:
    print("Error: 'coco.names' file not found. Please download it or specify the correct path.")

# Function to get output layer names
def get_output_layers(net):
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers

# Function to draw bounding box and label
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = str(classes[class_id])
    color = (0, 255, 0)
    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


YOLOv3 model loaded successfully!
COCO class labels loaded successfully!


In [3]:
vs = VideoStream(src=0).start()  # for webcam
# vs = VideoStream(path_to_video_file).start()  # for video file
time.sleep(2.0)  # allow the camera or video file to warm up

In [4]:
while True:
    frame = vs.read()
    frame = imutils.resize(frame, width=800)  # resize if necessary
    height, width = frame.shape[:2]

    # YOLO input preprocessing
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layer_outputs = net.forward(get_output_layers(net))
    end = time.time()

    # Process detections
    boxes = []
    confidences = []
    class_ids = []

    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:  # adjust confidence threshold as needed
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)

    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            draw_prediction(frame, class_ids[i], confidences[i], x, y, x + w, y + h)

    # Display the resulting frame
    cv2.imshow('Object Detection', frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

# Clean up
cv2.destroyAllWindows()
vs.stop()  # if using VideoStream
