In [1]:
# Initial things to use the drone - new features

# from djitellopy import tello
# from time import sleep

# me = tello.Tello()
# To connect 
# me.connect()

# To start 
# me.streamon()

# To takeoff
# me.takeoff()

# To control movement
# me.send_rc_control(left/right, forward/backward, up/down, yaw_velocity)

# To stop - for 5 seconds
# me.sleep(5)

# To land
# me.land()

## Actual run that I found the best solution

Using:

- cv2 to capture the image
- Yolo to detect the person - include this in litreature review
- https://docs.ultralytics.com/models/yolov8/ 


In [1]:
# This is more accurate - it tracks every part of a person

import cv2
from ultralytics import YOLO
import math
import logging

# These values need to be fine tuned
K_x = 0.05 # Left/Right movement scale
K_y = 0.1  # Forward/Backward movement scale
K_z = 0.02 # Up/Down movement scale

# Load YOLOv8 model
# Smallest YOLOv8 model
# Added verbose = False to avoid having a lot of outputs when running the code for example this was outputting for each small detection
# 0: 384x640 1 person, 71.6ms
# Speed: 9.2ms preprocess, 71.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
# Suppress YOLOv8 logging
logging.getLogger("ultralytics").setLevel(logging.WARNING)
model = YOLO("yolov8n.pt", verbose = False)  

# Read the drone's initial position
drone_x, drone_y, drone_z = 0, 0, 2 

last_position = None
last_area = None
pixel_threshold = 100

# Open webcam
cap = cv2.VideoCapture(0) 

# While the webcam is open
while cap.isOpened():
    success, frame = cap.read()
    # Define screen center
    frame_center_x = frame.shape[1] // 2  # Middle of frame
    frame_center_y = frame.shape[0] // 2 

    if not success:
        break

    # Inference on the frame 
    results = model(frame) 

    # If a person was detected
    # Lists to hold the person centre and area
    myPersonList = []
    myPersonListArea = []

    for r in results:
        for box in r.boxes:
            # Get class ID
            cls = int(box.cls[0]) 
            # Confidence score 
            conf = box.conf[0].item()  
            # Class 0 = "person", confidence > 80%
            if cls == 0 and conf > 0.8:  
                # Get bounding box coordinates
                x1, y1, x2, y2 = map(int, box.xyxy[0]) 
                # Center of bounding box
                cx, cy = (x1 + x2) // 2, (y1 + y2) // 2  
                bbox_height = y2 - y1
                # Calculate area
                area = (x2 - x1) * bbox_height
                
                # Depending on the size of the bounding box the drone will move
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2) 
                # This is what the drone will follow
                # Draw center point
                cv2.circle(frame, (cx, cy), 5, (0, 255, 0), cv2.FILLED) 
                # Append the area and the center of the circle
                myPersonList.append((cx, cy))
                myPersonListArea.append(area)

    # Select the largest detected person
    if myPersonList:
        i = myPersonListArea.index(max(myPersonListArea))
        person_x, person_y = myPersonList[i]        
        current_area = myPersonListArea[i]


        # To avoid having constant outputs for example when the person moves only a bit 
        if last_position is not None and last_area is not None and current_area > 0:
            # Calculate distance moved in pixels
            dx = person_x - last_position[0]
            dy = person_y - last_position[1]
            # Calculate the distance moved
            # Euclidean distance 
            distance_moved = math.sqrt(dx**2 + dy**2)

            # Calculate the change in area
            area_change = abs(current_area - last_area)

            # Check if the person moved more than the threshold to avoid having constant moves
            # This is only done to make the movement smoother - this was being calculated even if I barely move which we do not want 
            # We do not want to have a drone that would crash having constant changes in the position 
            if distance_moved > pixel_threshold:
                # Left/Right Movement (X-axis)
                drone_x += K_x * (person_x - frame_center_x)
                # Forward/Backward Movement (Y-axis)
                drone_y += K_y * ((last_area / current_area) - 1)
                # Up/Down Movement (Z-axis)
                drone_z += K_z * (frame_center_y - person_y)
                print(f'Area: {myPersonListArea[i]}, Center: {myPersonList[i]}')
                print(f"Person moved {distance_moved:.2f} pixels, updating movement.")
                print(f"New Drone Position: X={drone_x:.2f}, Y={drone_y:.2f}, Z={drone_z:.2f}")
                # Left/Right Movement
                # if person_x < frame_center_x - 50:
                #     direction = "left"
                #     print("Move Left")
                #     # Send command to drone: move left
                # elif person_x > frame_center_x + 50:
                #     direction = "right"  
                #     print("Move Right")
                #     # Send command to drone: move right

                # # Forward/Backward Movement
                # if myPersonListArea[i] < 5000:  # Adjust based on detection area
                #     direction = "forward"
                #     print("Move Forward")
                #     # Send command to drone: move forward
                # elif myPersonListArea[i] > 15000:
                #     direction = "backward"
                #     print("Move Backward")
                #     # Send command to drone: move backward


                # Update last position
                last_position = (person_x, person_y)
                last_area = current_area
        # Track the initial position of the person - assuming this is the space you want to have between the person and the drone 
        else:
            print(f'Area: {myPersonListArea[i]}, Center: {myPersonList[i]}')
            # First detection, initialize last position
            last_position = (person_x, person_y)
            last_area = current_area
                


    cv2.imshow("Person Detection (YOLOv8)", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break  # Press 'q' to exit

cap.release()
cv2.destroyAllWindows()


Area: 602928, Center: (800, 393)


KeyboardInterrupt: 

PROBLEMS :
- If there are multiple people it might change the person it is tracking (Maybe for now I will test it with one person)
- It is difficult to find how much the drone should move and I am not sure whether I am doing it well


Considerations -- things to check 
- Latency: Ensure minimal delay between detecting the red spot and sending commands.
- Safety: Test in a controlled environment to ensure predictable movements.
- Camera Feed Access: If using the drone’s camera feed, ensure you can stream it to your processing device.


# Following a Person