## Follow with HAARCASCADE

In [8]:
import cv2

# Cargar clasificador Haar para detección de cuerpo o cara
# Usa uno u otro dependiendo de lo que quieras detectar
# cascade_path = cv2.data.haarcascades + "haarcascade_fullbody.xml"
# cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
cascade_path = cv2.data.haarcascades + "haarcascade_upperbody.xml"

detector = cv2.CascadeClassifier(cascade_path)

# Iniciar cámara
cap = cv2.VideoCapture(0)

# Umbral para decidir si girar
THRESHOLD = 50  # píxeles

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    detections = detector.detectMultiScale(frame_gray, scaleFactor=1.1, minNeighbors=5)

    frame_center_x = frame.shape[1] // 2
    direction = "Centro"

    for (x, y, w, h) in detections:
        # Dibujar la detección
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Calcular el centro del objeto detectado
        person_center_x = x + w // 2

        # Decidir hacia dónde mover la cámara
        offset = person_center_x - frame_center_x
        if offset > THRESHOLD:
            direction = "Girar Derecha"
        elif offset < -THRESHOLD:
            direction = "Girar Izquierda"
        else:
            direction = "Centro"

        # Solo usamos la primera detección para simplificar
        break

    # Mostrar dirección sobre la imagen
    cv2.putText(frame, f"Direccion: {direction}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Mostrar imagen
    cv2.imshow("Detección", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

## Follow with YOLOv4-Tiny

Ejemplo ejecutar YOLO simple sin GPU

In [None]:
import cv2

# Cargar clases desde coco.names
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Cargar red YOLOv4-Tiny
net = cv2.dnn.readNetFromDarknet("yolov4-tiny.cfg", "yolov4-tiny.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)  # Usar CPU
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    height, width = frame.shape[:2]

    # Preparar blob
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)

    person_detected = False
    direction = "Centro"

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = int(scores.argmax())
            confidence = scores[class_id]

            if confidence > 0.5 and classes[class_id] == "person":
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, "Persona", (x, y - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                # Lógica de centrado
                frame_center_x = width // 2
                offset = center_x - frame_center_x

                if offset > 50:
                    direction = "Girar Derecha"
                elif offset < -50:
                    direction = "Girar Izquierda"
                else:
                    direction = "Centro"

                person_detected = True
                break
        if person_detected:
            break

    # Mostrar dirección
    cv2.putText(frame, f"Direccion: {direction}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    cv2.imshow("YOLOv4-Tiny Detección", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


- Solo detectar una sola persona (la primera que encuentre).
- Dibujar el bounding box solo para esa persona.
- Ejecutar YOLO solo cada X fotogramas (para ahorrar CPU).
- Dibuja una señal de "objetivo"

In [7]:
import cv2

# Configuración de la detección
FRAME_SKIP = 10 # Sólo detectar cada X frames para mejorar rendimiento
CONFIDENCE_THRESHOLD = 0.5

# Función para dibujar un overlay de objetivo en la imagen
def draw_target_overlay(frame, center_x, center_y):
    # Parámetros de color y grosor
    color = (0, 0, 255)  # Rojo en BGR
    thickness = 2

    # Círculos concéntricos
    # for radius in [15, 30, 45]:
    for radius in [10, 20, 30]:
        cv2.circle(frame, (center_x, center_y), radius, color, thickness)

    # Línea vertical
    cv2.line(frame, (center_x, center_y - 50), (center_x, center_y + 50), color, thickness)
    # Línea horizontal
    cv2.line(frame, (center_x - 50, center_y), (center_x + 50, center_y), color, thickness)


# Cargar clases desde coco.names
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Cargar red YOLOv4-Tiny
net = cv2.dnn.readNetFromDarknet("yolov4-tiny.cfg", "yolov4-tiny.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)  # Usar CPU
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]


# Captura de video
cap = cv2.VideoCapture(0)

person_box = None

frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    
    # Procesar cada FRAME_SKIP frames
    if frame_count % FRAME_SKIP == 0:

        height, width = frame.shape[:2]
        person_box = None
        center_x = int(width // 2)
        center_y = int(height // 2)

        # Preprocesamiento
        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        outputs = net.forward(output_layers)

        for output in outputs:
            for detection in output:
                scores = detection[5:]
                class_id = int(scores.argmax())
                confidence = scores[class_id]

                if classes[class_id] == "person" and confidence > CONFIDENCE_THRESHOLD:
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    person_box = (x, y, w, h)
                    break  # Detiene el bucle tras la primera persona detectada
            if person_box:
                break  # Sal de los outputs

    # Dibuja el cuadro si hay una persona detectada
    if person_box is not None:
        x, y, w, h = person_box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(frame, "STUPID HUMAN", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Dibuja el objetivo de la cámara (centro de la imagen o centro de la persona si se ha detectado). Pinta un punto rojo 
        # cv2.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)
        draw_target_overlay(frame, center_x, center_y)

    cv2.imshow("Detection", frame)
    if cv2.waitKey(1) == 27:  # ESC para salir
        break

cap.release()
cv2.destroyAllWindows()

Enviar comando movimiento Smooth a los servos

In [None]:
# import cv2
# # import serial ## UNCOMMENT THIS LINE IF YOU WANT TO USE ARDUINO
# import time
# import numpy as np

# # Configura el puerto serial para Arduino
# # arduino = serial.Serial('COM3', 9600)  # Cambia por tu puerto ## UNCOMMENT THIS LINE IF YOU WANT TO USE ARDUINO
# time.sleep(2)

# # Carga nombres de clases
# with open("coco.names", "r") as f:
#     classes = [line.strip() for line in f.readlines()]

# # Carga modelo YOLOv4-Tiny
# net = cv2.dnn.readNet("yolov4-tiny.weights", "yolov4-tiny.cfg")
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# layer_names = net.getLayerNames()
# output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]

# cap = cv2.VideoCapture(0)

# frame_count = 0

# # Configuración de la detección
# DETECT_EVERY_N_FRAMES = 10 # Sólo detectar cada X frames para mejorar rendimiento
# CONFIDENCE_THRESHOLD = 0.5

# # Variables para suavizado
# last_center_x = None
# last_center_y = None
# smoothing_factor = 0.2  # entre 0 y 1, menor es más lento

# def map_range(value, leftMin, leftMax, rightMin, rightMax):
#     leftSpan = leftMax - leftMin
#     rightSpan = rightMax - rightMin
#     valueScaled = float(value - leftMin) / float(leftSpan)
#     return rightMin + (valueScaled * rightSpan)

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     height, width, _ = frame.shape
#     center_x = None
#     center_y = None

#     frame_count += 1
#     if frame_count % DETECT_EVERY_N_FRAMES == 0:
#         # Crear blob y pasar por la red
#         blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416,416), swapRB=True, crop=False)
#         net.setInput(blob)
#         outs = net.forward(output_layers)

#         for out in outs:
#             for detection in out:
#                 scores = detection[5:]
#                 class_id = np.argmax(scores)
#                 confidence = scores[class_id]
#                 if confidence > CONFIDENCE_THRESHOLD and classes[class_id] == "person":
#                     centerX = int(detection[0] * width)
#                     centerY = int(detection[1] * height)
#                     w = int(detection[2] * width)
#                     h = int(detection[3] * height)
#                     x = int(centerX - w / 2)
#                     y = int(centerY - h / 2)
#                     person_box = [x, y, w, h]

#         # Non max suppression para eliminar detecciones solapadas
#         indexes = cv2.dnn.NMSBoxes(person_boxes, confidences, 0.5, 0.4)

#         if len(indexes) > 0:
#             # Tomamos la detección con mayor confianza (primer índice)
#             i = indexes[0]
#             x, y, w, h = person_boxes[i]
#             center_x = x + w // 2
#             center_y = y + h // 2
#             # Dibuja el cuadro si hay una persona detectada
#             cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
#             cv2.putText(frame, "STUPID HUMAN", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

#     # Suavizado de la posición de la cara detectada
#     if center_x is not None and center_y is not None:
#         if last_center_x is None or last_center_y is None:
#             smoothed_x = center_x
#             smoothed_y = center_y
#         else:
#             smoothed_x = int(last_center_x + smoothing_factor * (center_x - last_center_x))
#             smoothed_y = int(last_center_y + smoothing_factor * (center_y - last_center_y))

#         last_center_x, last_center_y = smoothed_x, smoothed_y

#         # Mapear la posición a ángulos servo
#         servo_1_angle = int(map_range(smoothed_x, 0, width, 0, 180))
#         servo_2_angle = int(map_range(smoothed_y, 0, height, 0, 180))

#         command = f"{servo_1_angle},{servo_2_angle}\n"
#         # arduino.write(command.encode()) ## UNCOMMENT THIS LINE IF YOU WANT TO USE ARDUINO

#         # Dibujar detección y punto centrado suavizado
#         cv2.putText(frame, f"Servo1: {servo_1_angle}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
#         cv2.putText(frame, f"Servo2: {servo_2_angle}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
#         draw_target_overlay(frame, smoothed_x, smoothed_y)

#     cv2.imshow("YOLO Person Tracking", frame)

#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()
# # arduino.close() ## UNCOMMENT THIS LINE IF YOU WANT TO USE ARDUINO
