two phase strategy - capturing image and detecting path, followed by live video feed and april tag tracking (first phase does not use live video - instead looks for two concurrent images)

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import grey_dilation
from pupil_apriltags import Detector

CELL_SIZE = 20
DETECTOR = Detector(families='tag36h11')


In [2]:
def pixel_to_grid(pixel, cell_size=CELL_SIZE):
    return (pixel[1] // cell_size, pixel[0] // cell_size)

def grid_to_pixel(grid, cell_size=CELL_SIZE):
    return (grid[1] * cell_size + cell_size // 2, grid[0] * cell_size + cell_size // 2)

def resize_for_pathfinding(mask, factor):
    small = cv2.resize(mask, (mask.shape[1] // factor, mask.shape[0] // factor), interpolation=cv2.INTER_NEAREST)
    return (small > 0).astype(np.uint8)

def inflate_obstacles(grid, inflation_radius=1):
    return grey_dilation(grid, size=(2 * inflation_radius + 1, 2 * inflation_radius + 1))


In [3]:
def detect_start_end_obstacles(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    green_mask = cv2.inRange(hsv, (40, 40, 40), (80, 255, 255))   # Start
    blue_mask = cv2.inRange(hsv, (100, 150, 0), (140, 255, 255))  # End
    orange_mask = cv2.inRange(hsv, (5, 150, 150), (20, 255, 255)) # Obstacles

    def find_centroid(mask):
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours:
            largest = max(contours, key=cv2.contourArea)
            M = cv2.moments(largest)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                return (cx, cy)
        return None

    return find_centroid(green_mask), find_centroid(blue_mask), orange_mask


In [4]:
def heuristic(a, b):
    return abs(a[0] - b[0]) + abs(a[1] - b[1])

def astar(grid, start, end):
    import heapq
    neighbors = [(0,1),(1,0),(-1,0),(0,-1)]
    close_set = set()
    came_from = {}
    gscore = {start: 0}
    fscore = {start: heuristic(start, end)}
    oheap = [(fscore[start], start)]

    while oheap:
        _, current = heapq.heappop(oheap)
        if current == end:
            path = []
            while current in came_from:
                path.append(current)
                current = came_from[current]
            path.append(start)
            path.reverse()
            return path

        close_set.add(current)
        for i, j in neighbors:
            neighbor = (current[0]+i, current[1]+j)
            if 0 <= neighbor[0] < grid.shape[0] and 0 <= neighbor[1] < grid.shape[1]:
                if grid[neighbor[0]][neighbor[1]] == 1:
                    continue
            else:
                continue
            tentative_g = gscore[current] + 1
            if neighbor in close_set and tentative_g >= gscore.get(neighbor, 0):
                continue
            if tentative_g < gscore.get(neighbor, float('inf')):
                came_from[neighbor] = current
                gscore[neighbor] = tentative_g
                fscore[neighbor] = tentative_g + heuristic(neighbor, end)
                heapq.heappush(oheap, (fscore[neighbor], neighbor))
    return []


In [5]:
cap = cv2.VideoCapture(1)  # or 0 depending on your webcam
path = []
path_pixels = []
frame = None

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    start, end, obstacles = detect_start_end_obstacles(frame)
    if start and end:
        start_g = pixel_to_grid(start)
        end_g = pixel_to_grid(end)
        obs_grid = resize_for_pathfinding(obstacles, CELL_SIZE)
        inflated = inflate_obstacles(obs_grid, inflation_radius=1)
        path = astar(inflated, start_g, end_g)
        if path:
            path_pixels = [grid_to_pixel(pt) for pt in path]
            break  # STOP after first valid path found

cap.release()

# === Draw the path on the image ===
output = frame.copy()

# Draw the path with circles
for pt in path_pixels:
    cv2.circle(output, pt, 2, (0, 0, 0), -1)  # Black dots

# Start and end markers
if path_pixels:
    cv2.circle(output, path_pixels[0], 5, (0, 255, 0), -1)   # Green = Start
    cv2.circle(output, path_pixels[-1], 5, (255, 0, 0), -1)  # Blue = End

# Show image with matplotlib (side-by-side original and path)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Original Image")
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.axis("off")

plt.subplot(1, 2, 2)
plt.title("Path Detected")
plt.imshow(cv2.cvtColor(output, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.show()


KeyboardInterrupt: 

In [None]:
def detect_apriltag(frame, target_id=0):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    results = DETECTOR.detect(gray)
    for r in results:
        if r.tag_id == target_id:
            center = tuple(map(int, r.center))
            return center
    return None

In [None]:

cap = cv2.VideoCapture(1)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    robot_pos = detect_apriltag(frame)
    display = frame.copy()

    # Draw path
    for pt in path_pixels:
        cv2.circle(display, pt, 2, (0, 0, 0), -1)

    # Robot position
    if robot_pos:
        cv2.circle(display, robot_pos, 6, (0, 0, 255), -1)

    # Start & End points
    if path_pixels:
        cv2.circle(display, path_pixels[0], 5, (0, 255, 0), -1)   # Start (Green)
        cv2.circle(display, path_pixels[-1], 5, (255, 0, 0), -1)  # End (Blue)

    cv2.imshow("Robot Tracking with AprilTag", display)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()