In [1]:
import cv2
import numpy as np
import pyfirmata
import threading
import time
from deep_sort_realtime.deepsort_tracker import DeepSort

# Cấu hình camera
cap = cv2.VideoCapture(0)
ws, hs = 1280, 720
cap.set(3, ws)
cap.set(4, hs)

if not cap.isOpened():
    print("Camera couldn't Access!!!")
    exit()

# Cấu hình Arduino và Servo
port = "COM3"
board = pyfirmata.Arduino(port)
servo_x = board.get_pin('d:9:s')
servo_y = board.get_pin('d:10:s')

servoPos = [90, 90]  # Góc mặc định
selected_id = None   # ID của đối tượng được chọn

# Khởi tạo DeepSORT
tracker = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0)

# Load mô hình YOLO
net = cv2.dnn.readNet("yolov4-tiny.weights", "yolov4-tiny.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

with open("coco.names", "r") as f:
    classes = f.read().strip().split("\n")

detected_objects = []
frame_skip = 3  # Chạy YOLO mỗi 3 frame để giảm tải
frame_count = 0

def yolo_detection():
    global detected_objects, frame_count
    while True:
        success, img = cap.read()
        if not success:
            continue

        frame_count += 1
        if frame_count % frame_skip != 0:
            time.sleep(0.01)
            continue

        blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        detections = net.forward(output_layers)

        detected_objects.clear()
        for output in detections:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > 0.5 and class_id == 0:
                    center_x, center_y, w, h = (detection[:4] * np.array([ws, hs, ws, hs])).astype(int)
                    x, y = center_x - w // 2, center_y - h // 2
                    detected_objects.append((x, y, w, h, confidence, class_id))
        
        time.sleep(0.01)

def click_event(event, x, y, flags, param):
    global selected_id
    if event == cv2.EVENT_LBUTTONDOWN:
        for track in tracker.tracker.tracks:
            if not track.is_confirmed():
                continue
            x1, y1, x2, y2 = track.to_tlbr()
            if x1 <= x <= x2 and y1 <= y <= y2:
                selected_id = track.track_id
                print(f"Đã chọn đối tượng ID: {selected_id}")

cv2.namedWindow("Image")
cv2.setMouseCallback("Image", click_event)

yolo_thread = threading.Thread(target=yolo_detection, daemon=True)
yolo_thread.start()

while True:
    success, img = cap.read()
    if not success:
        break

    detections_for_tracker = [((x, y, x+w, y+h), conf, class_id) 
                              for (x, y, w, h, conf, class_id) in detected_objects]
    tracks = tracker.update_tracks(detections_for_tracker, frame=img)

    for track in tracks:
        if not track.is_confirmed():
            continue

        x1, y1, x2, y2 = track.to_tlbr()
        track_id = track.track_id

        if track_id == selected_id:
            obj_center_x = (x1 + x2) // 2
            obj_center_y = (y1 + y2) // 2

            offset_x = obj_center_x - ws // 2
            offset_y = obj_center_y - hs // 2

            sensitivity = 0.05
            servoPos[0] -= offset_x * sensitivity / (ws // 2) * 45
            servoPos[1] -= offset_y * sensitivity / (hs // 2) * 45

            servoPos[0] = max(0, min(180, servoPos[0]))
            servoPos[1] = max(0, min(180, servoPos[1]))

            servo_x.write(servoPos[0])
            servo_y.write(servoPos[1])

            print(f"Servo X: {servoPos[0]:.2f}, Servo Y: {servoPos[1]:.2f}")

        color = (0, 255, 0) if track_id == selected_id else (255, 0, 0)
        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)
        cv2.putText(img, f'ID {track_id}', (int(x1), int(y1) - 10),
                    cv2.FONT_HERSHEY_PLAIN, 2, color, 2)

    cv2.imshow("Image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  from .autonotebook import tqdm as notebook_tqdm


Đã chọn đối tượng ID: 2
Servo X: 89.50, Servo Y: 88.55
Servo X: 89.01, Servo Y: 87.10
Servo X: 88.48, Servo Y: 85.67
Servo X: 87.95, Servo Y: 84.21
Servo X: 87.42, Servo Y: 82.79
Servo X: 86.96, Servo Y: 81.39
Servo X: 86.53, Servo Y: 80.00
Servo X: 86.08, Servo Y: 78.58
Servo X: 85.66, Servo Y: 77.16
Servo X: 84.94, Servo Y: 75.36
Servo X: 84.12, Servo Y: 73.42
Servo X: 83.26, Servo Y: 71.44
Servo X: 82.40, Servo Y: 69.42
Servo X: 81.50, Servo Y: 67.38
Servo X: 80.63, Servo Y: 65.36
Servo X: 79.73, Servo Y: 63.30
Servo X: 78.81, Servo Y: 61.19
Servo X: 77.85, Servo Y: 59.04
Servo X: 76.87, Servo Y: 56.85
Servo X: 75.86, Servo Y: 54.61
Servo X: 74.83, Servo Y: 52.33
Servo X: 73.77, Servo Y: 50.01
Servo X: 72.68, Servo Y: 47.65
Servo X: 71.56, Servo Y: 45.24
Servo X: 70.41, Servo Y: 42.79
Servo X: 69.24, Servo Y: 40.30
Servo X: 68.04, Servo Y: 37.76
Servo X: 66.82, Servo Y: 35.19
Servo X: 65.56, Servo Y: 32.57
Servo X: 64.28, Servo Y: 29.91
Servo X: 62.98, Servo Y: 27.20
Servo X: 61.64,

KeyboardInterrupt: 

In [None]:
import cv2
import numpy as np

import threading
import time
from deep_sort_realtime.deepsort_tracker import DeepSort

# Cấu hình camera
cap = cv2.VideoCapture(0)
ws, hs = 1280, 720
cap.set(3, ws)
cap.set(4, hs)

if not cap.isOpened():
    print("Camera couldn't Access!!!")
    exit()


servoPos = [90, 90]  # Góc mặc định
selected_id = None   # ID của đối tượng được chọn

# Khởi tạo DeepSORT
tracker = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0)

# Load mô hình YOLO
net = cv2.dnn.readNet("yolov4-tiny.weights", "yolov4-tiny.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

with open("coco.names", "r") as f:
    classes = f.read().strip().split("\n")

detected_objects = []
frame_skip = 3  # Chạy YOLO mỗi 3 frame để giảm tải
frame_count = 0

def yolo_detection():
    global detected_objects, frame_count
    while True:
        success, img = cap.read()
        if not success:
            continue

        frame_count += 1
        if frame_count % frame_skip != 0:
            time.sleep(0.01)
            continue

        blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        detections = net.forward(output_layers)

        detected_objects.clear()
        for output in detections:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > 0.5 and class_id == 0:
                    center_x, center_y, w, h = (detection[:4] * np.array([ws, hs, ws, hs])).astype(int)
                    x, y = center_x - w // 2, center_y - h // 2
                    detected_objects.append((x, y, w, h, confidence, class_id))
        
        time.sleep(0.01)

def click_event(event, x, y, flags, param):
    global selected_id
    if event == cv2.EVENT_LBUTTONDOWN:
        for track in tracker.tracker.tracks:
            if not track.is_confirmed():
                continue
            x1, y1, x2, y2 = track.to_tlbr()
            if x1 <= x <= x2 and y1 <= y <= y2:
                selected_id = track.track_id
                print(f"Đã chọn đối tượng ID: {selected_id}")

cv2.namedWindow("Image")
cv2.setMouseCallback("Image", click_event)

yolo_thread = threading.Thread(target=yolo_detection, daemon=True)
yolo_thread.start()

while True:
    success, img = cap.read()
    if not success:
        break

    detections_for_tracker = [((x, y, x+w, y+h), conf, class_id) 
                              for (x, y, w, h, conf, class_id) in detected_objects]
    tracks = tracker.update_tracks(detections_for_tracker, frame=img)

    for track in tracks:
        if not track.is_confirmed():
            continue

        x1, y1, x2, y2 = track.to_tlbr()
        track_id = track.track_id

        if track_id == selected_id:
            obj_center_x = (x1 + x2) // 2
            obj_center_y = (y1 + y2) // 2

            offset_x = obj_center_x - ws // 2
            offset_y = obj_center_y - hs // 2

            sensitivity = 0.05
            servoPos[0] -= offset_x * sensitivity / (ws // 2) * 45
            servoPos[1] -= offset_y * sensitivity / (hs // 2) * 45

            servoPos[0] = max(0, min(180, servoPos[0]))
            servoPos[1] = max(0, min(180, servoPos[1]))


            print(f"Servo X: {servoPos[0]:.2f}, Servo Y: {servoPos[1]:.2f}")

        color = (0, 255, 0) if track_id == selected_id else (255, 0, 0)
        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)
        cv2.putText(img, f'ID {track_id}', (int(x1), int(y1) - 10),
                    cv2.FONT_HERSHEY_PLAIN, 2, color, 2)

    cv2.imshow("Image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import cv2
import numpy as np
import pyfirmata
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from filterpy.kalman import KalmanFilter

# Kết nối Arduino
port = "COM3"
board = pyfirmata.Arduino(port)
servo_pinX = board.get_pin('d:9:s')
servo_pinY = board.get_pin('d:10:s')

# Load YOLO model
model = YOLO("model.pt")

# Deep SORT Tracker
tracker = DeepSort(max_age=600, max_iou_distance=0.5)

# Kalman Filter
kf = KalmanFilter(dim_x=4, dim_z=2)
kf.F = np.array([[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]])
kf.H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
kf.P *= 1000
kf.x = np.array([0, 0, 0, 0])

frame_step = 5  #số frame giữa các lần YOLO chạy 
ws, hs = 1280, 720  # Kích thước khung hình
selected_id = None  # ID của đối tượng đang theo dõi

# Mở Camera
cap = cv2.VideoCapture(0)

frame_count = 0  # Đếm số frame
detections = []  # Lưu kết quả YOLO giữa các lần chạy

# Hàm kiểm tra IoU để tránh trùng ID
def iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1_, y1_, x2_, y2_ = box2
    xi1, yi1, xi2, yi2 = max(x1, x1_), max(y1, y1_), min(x2, x2_), min(y2, y2_)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_ - x1_) * (y2_ - y1_)
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0

# Sự kiện click chuột để chọn đối tượng
cv2.namedWindow("Tracking")
def select_object(event, x, y, flags, param):
    global selected_id
    if event == cv2.EVENT_LBUTTONDOWN:
        for track in param:
            x1, y1, x2, y2 = map(int, track.to_ltrb())
            if x1 <= x <= x2 and y1 <= y <= y2:
                selected_id = track.track_id
                print(f"Chọn đối tượng ID: {selected_id}")

cv2.setMouseCallback("Tracking", select_object, param=[])

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break
    frame_count += 1

    if frame_count % frame_step == 0:
        results = model(frame)
        new_detections = []
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            confidence = box.conf[0].item()
            class_id = int(box.cls[0].item())
            if class_id == 0:  # Chỉ giữ lại người
                new_detections.append(([x1, y1, x2 - x1, y2 - y1], confidence, class_id))
        detections = new_detections

    # Cập nhật tracker
    tracks = tracker.update_tracks(detections, frame=frame)
    cv2.setMouseCallback("Tracking", select_object, param=tracks)

    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        x1, y1, x2, y2 = map(int, track.to_ltrb())
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

        if selected_id is not None and track_id == selected_id:
            if frame_count % frame_step == 0:
                kf.update([cx, cy])
            else:
                kf.predict()
            predicted = kf.x[:2]
            servo_x = np.clip(np.interp(predicted[0], [0, ws], [180, 0]), 0, 180)
            servo_y = np.clip(np.interp(predicted[1], [0, hs], [180, 0]), 0, 180)
            servo_pinX.write(servo_x)
            servo_pinY.write(servo_y)

        color = (0, 255, 0) if track_id == selected_id else (255, 0, 0)
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"ID {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    cv2.imshow("Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 391.8ms
Speed: 6.7ms preprocess, 391.8ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 345.1ms
Speed: 3.8ms preprocess, 345.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 409.9ms
Speed: 3.8ms preprocess, 409.9ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 458.6ms
Speed: 4.7ms preprocess, 458.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 380.4ms
Speed: 4.9ms preprocess, 380.4ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 338.1ms
Speed: 5.0ms preprocess, 338.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 351.1ms
Speed: 2.7ms preprocess, 351.1ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 tie, 375.9ms
Speed: 2.7ms preprocess, 375.9ms inference, 2.3ms postprocess 