In [None]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolo11x.pt")

# Open the webcam (source="0" for the default camera)
cap = cv2.VideoCapture(0)  # Replace 0 with the index of your camera if you have multiple

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Read frames from the webcam and apply YOLO predictions
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame from webcam.")
        break

    # Run YOLO predictions on the current frame
    results = model.predict(source=frame, show=False)  # Disable auto-show to process manually

    # Visualize predictions on the frame
    annotated_frame = results[0].plot()

    # Display the frame with YOLO predictions
    cv2.imshow("YOLOv8 Webcam", annotated_frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [1]:
import cv2
from ultralytics import YOLO
import time
from twilio.rest import Client
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Twilio configuration
ACCOUNT_SID = os.getenv("ACCOUNT_SID")
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
TWILIO_WHATSAPP_NUMBER = 'whatsapp:+14155238886'
YOUR_WHATSAPP_NUMBER = 'whatsapp:+14704627175'

# Initialize Twilio client
client = Client(ACCOUNT_SID, AUTH_TOKEN)

# Function to send a WhatsApp message
def send_whatsapp_message(message_body):
    message = client.messages.create(
        from_=TWILIO_WHATSAPP_NUMBER,
        body=message_body,
        to=YOUR_WHATSAPP_NUMBER
    )
    print(f"Message sent: {message_body}")

# Load the YOLOv8 model
model = YOLO("yolo11x.pt")  # Use a smaller model for real-time performance

# Open the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Adjust webcam resolution to reduce latency
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

# Initialize variables for FPS calculation and flags
prev_time = time.time()
message_sent_time = 0  # Track the last time a message was sent
cooldown_seconds = 10  # Cooldown period (e.g., 10 seconds)
detected_classes = set()  # Track detected classes in the session

# Read frames from the webcam and apply YOLO predictions
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame from webcam.")
        break

    # Run YOLO predictions on the current frameq
    results = model.predict(source=frame, conf=0.5, show=False)

    # Visualize predictions on the frame
    annotated_frame = results[0].plot()

    # Check for "banana" or "apple" in the detections
    for detection in results[0].boxes:
        class_name = model.names[int(detection.cls)]  # Get the class name
        if class_name in ["banana", "apple"]:
            # If the object hasn't been detected in the session, send a message
            if class_name not in detected_classes:
                send_whatsapp_message(f"{class_name.capitalize()} detected!")
                detected_classes.add(class_name)
                message_sent_time = time.time()  # Log the time the message was sent

    # Reset detected classes after cooldown period
    if time.time() - message_sent_time > cooldown_seconds:
        detected_classes.clear()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Display FPS on the frame
    cv2.putText(annotated_frame, f"FPS: {int(fps)}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Display the frame with YOLO predictions
    cv2.imshow("YOLOv8 Webcam", annotated_frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()





0: 384x640 2 persons, 2 dogs, 1 chair, 1 couch, 2 laptops, 220.5ms
Speed: 1.7ms preprocess, 220.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 196.7ms
Speed: 1.2ms preprocess, 196.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 dogs, 2 chairs, 1 couch, 2 laptops, 225.8ms
Speed: 1.3ms preprocess, 225.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 dogs, 1 chair, 1 couch, 2 laptops, 1 mouse, 246.6ms
Speed: 1.2ms preprocess, 246.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)



2025-01-17 16:13:12.242 Python[28874:8627736] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-17 16:13:12.242 Python[28874:8627736] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 1 mouse, 265.0ms
Speed: 2.5ms preprocess, 265.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 2 chairs, 1 couch, 2 laptops, 1 mouse, 228.4ms
Speed: 1.2ms preprocess, 228.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 1 mouse, 232.4ms
Speed: 1.2ms preprocess, 232.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 1 mouse, 230.7ms
Speed: 1.3ms preprocess, 230.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 1 mouse, 193.0ms
Speed: 1.0ms preprocess, 193.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 dog, 1 chair, 1 couch, 2 laptops, 1 mouse, 187.6ms
Speed: 1.3ms preprocess, 187.6ms inference, 0.6ms post

KeyboardInterrupt: 