In [1]:
import tensorflow as tf
import cv2
import math
from ultralytics import YOLO
import tkinter as tk

In [2]:
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
  details = tf.config.experimental.get_device_details(gpu_devices[0])
  gpu_name = details.get('device_name', 'Unknown GPU')
print(gpu_name)

NVIDIA GeForce RTX 4050 Laptop GPU


In [3]:
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

# model
model = YOLO("yolo-Weights/yolov8n.pt")

# object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]


while True:
    success, img = cap.read()
    results = model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->",confidence)

            # class name
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Confidence ---> 0.82
Class name --> person
0: 480x640 1 person, 381.4ms
Speed: 14.1ms preprocess, 381.4ms inference, 14.9ms postprocess per image at shape (1, 3, 480, 640)

Confidence ---> 0.86
Class name --> person
0: 480x640 1 person, 71.1ms
Speed: 0.0ms preprocess, 71.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

Confidence ---> 0.87
Class name --> person
0: 480x640 1 person, 73.3ms
Speed: 0.0ms preprocess, 73.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

Confidence ---> 0.87
Class name --> person
Confidence ---> 0.3
Class name --> teddy bear
0: 480x640 1 person, 1 teddy bear, 67.4ms
Speed: 0.0ms preprocess, 67.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

Confidence ---> 0.88
Class name --> person
Confidence ---> 0.32
Class name --> teddy bear
0: 480x640 1 person, 1 teddy bear, 81.4ms
Speed: 0.0ms preprocess, 81.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

Confidence ---> 0.87
Class na