In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

Base Code (cleaned up version)

In [4]:
def initialize_point():
    # Point is initialized by clicking with the mouse
    ix, iy = -1, -1  # Initialize coordinates
    prompt_shown = False

    def onMouse(event, x, y, flags, params):
        nonlocal ix, iy, prompt_shown
        if event == cv2.EVENT_LBUTTONDOWN:
            ix, iy = x, y
            prompt_shown = True

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while True:
        _, frame = cap.read()

        if not prompt_shown:
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 2
            cv2.putText(frame, "Select a point to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame)

        # Display coordinates on the camera frame
        if ix != -1 and iy != -1:
            cv2.circle(frame, (ix, iy), 5, (0, 225, 0), -1)

        key = cv2.waitKey(1)
        if key == 27 or prompt_shown:
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array([[ix, iy]], dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, ix, iy

def track_point(old_frame, old_points, initial_x, initial_y):
    cap = cv2.VideoCapture(0)

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=1,
                                                             criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                                                                       15, 0.08))

        # Draw a slightly bigger dot at the new point
        if status.ravel()[0] == 1:  # Check if the point is successfully tracked
            x, y = new_points.ravel()
            mask = np.zeros_like(frame2)
            cv2.circle(mask, (int(x), int(y)), 5, (0, 255, 0), -1)  # Increase the radius to make it bigger
            combined = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

            # Display coordinates of the selected point and tracking result
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 1
            cv2.putText(combined, f"Initial Coordinates: ({initial_x}, {initial_y})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            # Calculate and display live coordinates of the tracked point
            live_x, live_y = int(x), int(y)
            cv2.putText(combined, f"Live Coordinates: ({live_x}, {live_y})", (10, 60), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            cv2.imshow("Camera", combined)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    old_frame, old_points, initial_x, initial_y = initialize_point()
    print("Selected Point Coordinates: ", initial_x, initial_y)
    track_point(old_frame, old_points, initial_x, initial_y)

Selected Point Coordinates:  240 314


Three Points Tracked

In [7]:
def initialize_points(num_points):
    # Initialize points list and counters
    points = []
    point_counter = 0

    def onMouse(event, x, y, flags, params):
        nonlocal point_counter
        if event == cv2.EVENT_LBUTTONDOWN:
            points.append((x, y))
            point_counter += 1
            if point_counter == num_points:
                cv2.destroyWindow("Camera")  # Close the camera window after selecting all points

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while point_counter < num_points:
        _, frame = cap.read()

        # Display instructions for selecting points
        font_scale = 0.7
        font_color = (0, 225, 0)
        font_thickness = 2
        cv2.putText(frame, f"Select {num_points} points to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale, font_color, font_thickness)

        # Draw circles on selected points
        for point in points:
            cv2.circle(frame, point, 5, (0, 0, 255), -1)  # Draw a red circle on each selected point

        cv2.imshow("Camera", frame)

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array(points, dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, points

def track_points(old_frame, old_points, num_points, points):
    cap = cv2.VideoCapture(0)

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=1,
                                                             criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                                                                       15, 0.08))

        # Draw slightly bigger dots at the new points
        for i in range(num_points):
            if status.ravel()[i] == 1:  # Check if the point is successfully tracked
                x, y = new_points[i].ravel()
                mask = np.zeros_like(frame2)
                cv2.circle(mask, (int(x), int(y)), 5, (0, 255, 0), -1)  # Increase the radius to make it bigger
                frame2 = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

                # Display coordinates of the selected point and tracking result
                font_scale = 0.7
                font_color = (0, 225, 0)
                font_thickness = 1
                cv2.putText(frame2, f"Point {i + 1} Coordinates: ({points[i][0]}, {points[i][1]})", (10, 30 + i * 30),
                            cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame2)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    num_points = 3
    old_frame, old_points, points = initialize_points(num_points)
    print("Selected Point Coordinates: ", points)
    track_points(old_frame, old_points, num_points, points)

Selected Point Coordinates:  [(478, 161), (426, 148), (400, 169)]


Improving Accuracy of Base Code

In [9]:
def initialize_point():
    # Point is initialized by clicking with the mouse
    ix, iy = -1, -1  # Initialize coordinates
    prompt_shown = False

    def onMouse(event, x, y, flags, params):
        nonlocal ix, iy, prompt_shown
        if event == cv2.EVENT_LBUTTONDOWN:
            ix, iy = x, y
            prompt_shown = True

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while True:
        _, frame = cap.read()

        if not prompt_shown:
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 2
            cv2.putText(frame, "Select a point to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame)

        # Display coordinates on the camera frame
        if ix != -1 and iy != -1:
            cv2.circle(frame, (ix, iy), 5, (0, 225, 0), -1)

        key = cv2.waitKey(1)
        if key == 27 or prompt_shown:
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array([[ix, iy]], dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, ix, iy

def track_point(old_frame, old_points, initial_x, initial_y):
    cap = cv2.VideoCapture(0)

    # Increase the number of pyramid levels for more accurate tracking
    max_level = 3

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        # Increase the termination criteria values for accuracy
        criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=max_level,
                                                             criteria=criteria)

        # Draw a slightly bigger dot at the new point
        if status.ravel()[0] == 1:  # Check if the point is successfully tracked
            x, y = new_points.ravel()
            mask = np.zeros_like(frame2)
            cv2.circle(mask, (int(x), int(y)), 15, (0, 255, 0), -1)  # Increase the radius to make it bigger
            combined = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

            # Display coordinates of the selected point and tracking result
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 1
            cv2.putText(combined, f"Initial Coordinates: ({initial_x}, {initial_y})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            # Calculate and display live coordinates of the tracked point
            live_x, live_y = int(x), int(y)
            cv2.putText(combined, f"Live Coordinates: ({live_x}, {live_y})", (10, 60), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            cv2.imshow("Camera", combined)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    old_frame, old_points, initial_x, initial_y = initialize_point()
    print("Selected Point Coordinates: ", initial_x, initial_y)
    track_point(old_frame, old_points, initial_x, initial_y)

Selected Point Coordinates:  452 178


Feature Detector using Shi-Tomasi (Images)

In [2]:
image_path = "G:\THESIS\SampleImages\SMTracking_LongSleeve.jpg"
image = cv2.imread(image_path)

grayscale = cv2.cvtColor(image,  cv2.COLOR_BGR2GRAY)

#Perform Shi-Tomasi Corner Detection
max_corners = 100
quality_level = 0.01
min_distance = 10

corners = cv2.goodFeaturesToTrack(grayscale, maxCorners= max_corners, qualityLevel= quality_level,
                                  minDistance=min_distance)
corners = np.int0(corners)

while True:
    for corner in corners:
        x, y = corner.ravel()
        cv2.circle(image, (x, y), 3, (0, 255, 0), -1)

    cv2.imshow('Shi-Tomasi Corner Detection', image)

    if cv2.waitKey(1) == 27:
        break


cv2.destroyAllWindows()


  corners = np.int0(corners)
