In [1]:
import cv2
import os
import numpy as np
from ultralytics import YOLO
import csv
import socket
import threading

model = YOLO('yolo11n.pt')
cap = cv2.VideoCapture(0)

save_objects = ["cell phone", "remote", "keyboard"]
csv_filename = "detected_objects.csv"

# TCP Server to send detected object names and receive task data
def handle_client(client_socket):
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture image. Exiting...")
            break

        results = model.predict(source=frame)
        annotated_frame = results[0].plot()

        detected_objects = []

        for detection in results[0].boxes:
            class_id = int(detection.cls[0])
            class_name = results[0].names[class_id]
            confidence = float(detection.conf[0])
            x1, y1, x2, y2 = detection.xywh[0]

            if class_name in save_objects:
                with open(csv_filename, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([class_name, confidence, x1, y1, x2, y2])
                detected_objects.append(class_name)

        if detected_objects:
            # Send the detected object names as a message to Unity
            client_socket.sendall(",".join(detected_objects).encode())

        cv2.imshow("Real-Time Object Detection", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    client_socket.close()

# TCP Server Setup
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(('localhost', 8765))
server.listen(5)
print("Waiting for connection...")

def start_server():
    while True:
        client_socket, addr = server.accept()
        print(f"Connection from {addr}")
        threading.Thread(target=handle_client, args=(client_socket,)).start()

# Start the server in a separate thread
threading.Thread(target=start_server, daemon=True).start()

# Wait forever for the server to keep running
while True:
    pass


Waiting for connection...
Connection from ('127.0.0.1', 55930)

Connection from ('127.0.0.1', 55988)

0: 480x640 1 person, 5959.8ms
Speed: 170.1ms preprocess, 5959.8ms inference, 763.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 5921.0ms
Speed: 171.1ms preprocess, 5921.0ms inference, 682.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 6194.9ms
Speed: 169.0ms preprocess, 6194.9ms inference, 966.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 6041.4ms
Speed: 185.7ms preprocess, 6041.4ms inference, 965.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 surfboard, 5837.3ms
Speed: 104.3ms preprocess, 5837.3ms inference, 823.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6200.0ms
Speed: 159.4ms preprocess, 6200.0ms inference, 512.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 5854.6ms
Speed: 141.7ms preprocess, 5854.6ms inferenc

Exception in thread Thread-6 (handle_client):
Traceback (most recent call last):
  File "c:\Python312\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Python312\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\OMAR RAYYAN\AppData\Local\Temp\ipykernel_61088\1971038315.py", line 42, in handle_client
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine


0: 480x640 2 persons, 1 cell phone, 6294.8ms
Speed: 201.7ms preprocess, 6294.8ms inference, 938.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 5986.8ms
Speed: 108.9ms preprocess, 5986.8ms inference, 716.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 5694.8ms
Speed: 78.1ms preprocess, 5694.8ms inference, 587.2ms postprocess per image at shape (1, 3, 480, 640)



KeyboardInterrupt: 

0: 480x640 1 person, 1639.5ms
Speed: 186.7ms preprocess, 1639.5ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 165.9ms
Speed: 2.5ms preprocess, 165.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 132.3ms
Speed: 3.5ms preprocess, 132.3ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 240.8ms
Speed: 4.0ms preprocess, 240.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 125.9ms
Speed: 3.0ms preprocess, 125.9ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 118.2ms
Speed: 2.0ms preprocess, 118.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cup, 245.9ms
Speed: 2.5ms preprocess, 245.9ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 120.4ms
Speed: 2.5ms preprocess, 120.4ms inference, 1.8ms postprocess pe

In [None]:
import cv2
import os
import numpy as np
from ultralytics import YOLO
import csv
import socket
import threading

# Load the YOLO model
model = YOLO('yolo11n.pt')

# Open the webcam
cap = cv2.VideoCapture(0)

# Objects to detect
save_objects = ["cell phone", "remote", "keyboard"]
csv_filename = "detected_objects.csv"

# TCP Server to send detected object names and receive task data
def handle_client(client_socket):
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture image. Exiting...")
            break

        # Run object detection
        results = model.predict(source=frame)
        annotated_frame = results[0].plot()

        detected_objects = []

        # Loop through the detections and filter for the objects we care about
        for detection in results[0].boxes:
            class_id = int(detection.cls[0])
            class_name = results[0].names[class_id]
            confidence = float(detection.conf[0])
            x1, y1, x2, y2 = detection.xywh[0]

            if class_name in save_objects:
                with open(csv_filename, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([class_name, confidence, x1, y1, x2, y2])
                detected_objects.append(class_name)

        if detected_objects:
            # Send the detected object names as a message to Unity
            client_socket.sendall(",".join(detected_objects).encode())

        # Display the annotated frame
        cv2.imshow("Real-Time Object Detection", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    client_socket.close()

# TCP Server Setup
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(('localhost', 8765))
server.listen(5)
print("Waiting for connection...")

def start_server():
    while True:
        client_socket, addr = server.accept()
        print(f"Connection from {addr}")
        threading.Thread(target=handle_client, args=(client_socket,)).start()

# Start the server in a separate thread
threading.Thread(target=start_server, daemon=True).start()

# Keep the server running indefinitely
while True:
    pass


Waiting for connection...
Connection from ('127.0.0.1', 56175)

0: 480x640 1 person, 5951.6ms
Speed: 138.4ms preprocess, 5951.6ms inference, 704.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cup, 1 fork, 5465.5ms
Speed: 128.6ms preprocess, 5465.5ms inference, 591.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 fork, 5196.6ms
Speed: 139.7ms preprocess, 5196.6ms inference, 448.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cup, 5875.5ms
Speed: 122.4ms preprocess, 5875.5ms inference, 809.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 traffic light, 1 cup, 1 fork, 5705.2ms
Speed: 125.5ms preprocess, 5705.2ms inference, 793.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 cup, 1 cell phone, 5372.0ms
Speed: 172.1ms preprocess, 5372.0ms inference, 684.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 cup, 1 cell phone, 5339