In [1]:
import cv2
import math
from ultralytics import YOLO
from IPython.display import display
import ipywidgets as widgets

# Load the YOLO model
model = YOLO("yolo-Weights/yolov8n.pt")

# Object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

# Pre-defined list of correct predictions (for demonstration purposes)
# In real-world usage, this list would be dynamically provided
correct_predictions = ["cell phone"]  # Example list

# Function to process and display the video frames
def capture_and_detect():
    # Start the built-in camera
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    # Widget to display the live video feed
    image_widget = widgets.Image(format='jpeg')
    display(image_widget)
    cellphone_count = 0

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Failed to capture image")
                break

            results = model(frame, stream=True)

            for r in results:
                boxes = r.boxes
                for box in boxes:
                    # Bounding box
                    x1, y1, x2, y2 = box.xyxy[0]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                    # Draw bounding box
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)

                    # Confidence
                    confidence = math.ceil((box.conf[0] * 100)) / 100

                    # Class name
                    cls = int(box.cls[0])
                    class_name = classNames[cls]
                    # Check if the predicted class is correct
                    classification = 1 if class_name in correct_predictions else 0
                    if class_name == "cell phone":
                        cellphone_count += 1

                    # Print the result in the console
                    print(f"Class name: {class_name}, Confidence: {confidence}, Classification: {classification}")

                    # Object details
                    org = (x1, y1 - 10)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    fontScale = 0.5
                    color = (255, 0, 0)
                    thickness = 2

                    cv2.putText(frame, f"{class_name} {confidence}", org, font, fontScale, color, thickness)

            # Encode the frame in JPEG format
            _, jpeg = cv2.imencode('.jpeg', frame)
            image_widget.value = jpeg.tobytes()
            print(f"Total 'cell phone' predictions: {cellphone_count}")

            # Update the display
            #display(image_widget)

            # Exit on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                  break
    finally:
        cap.release()
        cv2.destroyAllWindows()

# Call the function to start the live detection
capture_and_detect()


SyntaxError: invalid decimal literal (3867949252.py, line 75)