In [1]:
import cv2
import numpy as np
import time
import matplotlib.pyplot as plt

Base Code (cleaned up version)

In [81]:
def initialize_point():
    # Point is initialized by clicking with the mouse
    ix, iy = -1, -1  # Initialize coordinates
    prompt_shown = False

    def onMouse(event, x, y, flags, params):
        nonlocal ix, iy, prompt_shown
        if event == cv2.EVENT_LBUTTONDOWN:
            ix, iy = x, y
            prompt_shown = True

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while True:
        _, frame = cap.read()

        if not prompt_shown:
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 2
            cv2.putText(frame, "Select a point to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame)

        # Display coordinates on the camera frame
        if ix != -1 and iy != -1:
            cv2.circle(frame, (ix, iy), 5, (0, 225, 0), -1)

        key = cv2.waitKey(1)
        if key == 27 or prompt_shown:
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array([[ix, iy]], dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, ix, iy

def track_point(old_frame, old_points, initial_x, initial_y):
    cap = cv2.VideoCapture(0)

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=1,
                                                             criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                                                                       15, 0.08))

        # Draw a slightly bigger dot at the new point
        if status.ravel()[0] == 1:  # Check if the point is successfully tracked
            x, y = new_points.ravel()
            mask = np.zeros_like(frame2)
            cv2.circle(mask, (int(x), int(y)), 5, (0, 255, 0), -1)  # Increase the radius to make it bigger
            combined = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

            # Display coordinates of the selected point and tracking result
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 1
            cv2.putText(combined, f"Initial Coordinates: ({initial_x}, {initial_y})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            # Calculate and display live coordinates of the tracked point
            live_x, live_y = int(x), int(y)
            cv2.putText(combined, f"Live Coordinates: ({live_x}, {live_y})", (10, 60), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            cv2.imshow("Camera", combined)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    old_frame, old_points, initial_x, initial_y = initialize_point()
    print("Selected Point Coordinates: ", initial_x, initial_y)
    track_point(old_frame, old_points, initial_x, initial_y)

Selected Point Coordinates:  471 283


Three Points Tracked

In [7]:
def initialize_points(num_points):
    # Initialize points list and counters
    points = []
    point_counter = 0

    def onMouse(event, x, y, flags, params):
        nonlocal point_counter
        if event == cv2.EVENT_LBUTTONDOWN:
            points.append((x, y))
            point_counter += 1
            if point_counter == num_points:
                cv2.destroyWindow("Camera")  # Close the camera window after selecting all points

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while point_counter < num_points:
        _, frame = cap.read()

        # Display instructions for selecting points
        font_scale = 0.7
        font_color = (0, 225, 0)
        font_thickness = 2
        cv2.putText(frame, f"Select {num_points} points to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale, font_color, font_thickness)

        # Draw circles on selected points
        for point in points:
            cv2.circle(frame, point, 5, (0, 0, 255), -1)  # Draw a red circle on each selected point

        cv2.imshow("Camera", frame)

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array(points, dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, points

def track_points(old_frame, old_points, num_points, points):
    cap = cv2.VideoCapture(0)

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=1,
                                                             criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                                                                       15, 0.08))

        # Draw slightly bigger dots at the new points
        for i in range(num_points):
            if status.ravel()[i] == 1:  # Check if the point is successfully tracked
                x, y = new_points[i].ravel()
                mask = np.zeros_like(frame2)
                cv2.circle(mask, (int(x), int(y)), 5, (0, 255, 0), -1)  # Increase the radius to make it bigger
                frame2 = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

                # Display coordinates of the selected point and tracking result
                font_scale = 0.7
                font_color = (0, 225, 0)
                font_thickness = 1
                cv2.putText(frame2, f"Point {i + 1} Coordinates: ({points[i][0]}, {points[i][1]})", (10, 30 + i * 30),
                            cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame2)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    num_points = 3
    old_frame, old_points, points = initialize_points(num_points)
    print("Selected Point Coordinates: ", points)
    track_points(old_frame, old_points, num_points, points)

Selected Point Coordinates:  [(478, 161), (426, 148), (400, 169)]


Improving Accuracy of Base Code

In [14]:
def initialize_point():
    # Point is initialized by clicking with the mouse
    ix, iy = -1, -1  # Initialize coordinates
    prompt_shown = False

    def onMouse(event, x, y, flags, params):
        nonlocal ix, iy, prompt_shown
        if event == cv2.EVENT_LBUTTONDOWN:
            ix, iy = x, y
            prompt_shown = True

    cv2.namedWindow("Camera")
    cv2.setMouseCallback("Camera", onMouse)

    cap = cv2.VideoCapture(0)
    old_frame = None  # Initialize old_frame

    while True:
        _, frame = cap.read()

        if not prompt_shown:
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 2
            cv2.putText(frame, "Select a point to track (left-click)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)

        cv2.imshow("Camera", frame)

        # Display coordinates on the camera frame
        if ix != -1 and iy != -1:
            cv2.circle(frame, (ix, iy), 5, (0, 225, 0), -1)

        key = cv2.waitKey(1)
        if key == 27 or prompt_shown:
            break

    old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    old_points = np.array([[ix, iy]], dtype="float32").reshape(-1, 1, 2)
    cap.release()
    cv2.destroyAllWindows()

    return old_frame, old_points, ix, iy

def track_point(old_frame, old_points, initial_x, initial_y):
    cap = cv2.VideoCapture(0)

    # Increase the number of pyramid levels for more accurate tracking
    max_level = 3

    while True:
        _, frame2 = cap.read()

        if frame2 is None:  # Check if frame2 is valid
            break

        new_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        # Increase the termination criteria values for accuracy
        criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01)

        new_points, status, error = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, old_points,
                                                             None, maxLevel=max_level,
                                                             criteria=criteria)

        # Draw a slightly bigger dot at the new point
        if status.ravel()[0] == 1:  # Check if the point is successfully tracked
            x, y = new_points.ravel()
            mask = np.zeros_like(frame2)
            cv2.circle(mask, (int(x), int(y)), 15, (0, 255, 0), -1)  # Increase the radius to make it bigger
            combined = cv2.addWeighted(frame2, 0.7, mask, 0.3, 0.1)

            # Display coordinates of the selected point and tracking result
            font_scale = 0.7
            font_color = (0, 225, 0)
            font_thickness = 1
            cv2.putText(combined, f"Initial Coordinates: ({initial_x}, {initial_y})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            # Calculate and display live coordinates of the tracked point
            live_x, live_y = int(x), int(y)
            cv2.putText(combined, f"Live Coordinates: ({live_x}, {live_y})", (10, 60), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, font_color, font_thickness)
            
            cv2.imshow("Camera", combined)

        old_frame = new_frame.copy()
        old_points = new_points.copy()

        key = cv2.waitKey(1)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    old_frame, old_points, initial_x, initial_y = initialize_point()
    print("Selected Point Coordinates: ", initial_x, initial_y)
    track_point(old_frame, old_points, initial_x, initial_y)

Selected Point Coordinates:  284 288


Feature Detector using Shi-Tomasi (Images)

In [94]:
image_path = "G:\THESIS\SampleImages\SMTracking_LongSleeve.jpg"
image = cv2.imread(image_path)
grayscale = cv2.cvtColor(image,  cv2.COLOR_BGR2GRAY)

#Perform Shi-Tomasi Corner Detection
max_crnrs = 1500
qual_lvl = 0.0001
min_dist = 0.1

corners = cv2.goodFeaturesToTrack(grayscale, maxCorners=max_crnrs, qualityLevel=qual_lvl, 
                                  minDistance=min_dist)
corners = np.int0(corners)

for corner in corners:
    x, y = corner.ravel()
    cv2.circle(image, (x, y), 3, (0, 255, 0), -1)

while True:
    cv2.imshow('Shi-Tomasi Corner Detection', image)

    if cv2.waitKey(1) == 27:
        break


cv2.destroyAllWindows()

  corners = np.int0(corners)


Modularize Shi_Tomasi function (not working yet)

In [40]:
def shi_tomasi_corner_detection():
    image_path = r'G:\THESIS\SampleImages\SMTracking_LongSleeve.jpg'
    image = cv2.imread(image_path)
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Parameters for Shi-Tomasi Corner Detection
    max_crnrs = 1000
    qual_lvl = 0.0001
    min_dist = 0.1

    # Detect Shi-Tomasi corners
    corners = cv2.goodFeaturesToTrack(grayscale, maxCorners=max_crnrs, qualityLevel=qual_lvl,
                                      minDistance=min_dist)
    corners = np.int0(corners)

    for corner in corners:
        x, y = corner.ravel()
        cv2.circle(image, (x, y), 3, (0, 255, 0), -1)

    while True:
        cv2.imshow('Shi-Tomasi Corner Detection', image)
        if cv2.waitKey(1) == 27:
            break

    cv2.destroyAllWindows()

if __name__ == "__main":
    shi_tomasi_corner_detection()

Shi-Tomasi with WebCam or Video

In [13]:
def shi_tomasi_corner_detection(video_path):
    cap = cv2.VideoCapture(video_path)

    window_size = (1280, 720) 
    cv2.namedWindow('Shi-Tomasi Corner Detection', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Shi-Tomasi Corner Detection', *window_size)

    frame_width = 640
    frame_height = 360
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

    fps = 60 
    frame_interval = 1.0 / fps

    while True:
        start_time = time.time()

        ret, frame = cap.read()

        if not ret:
            break

        grayscale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        max_crnrs = 3000
        qual_lvl = 0.001
        min_dist = 0.1

        corners = cv2.goodFeaturesToTrack(grayscale, maxCorners=max_crnrs, qualityLevel=qual_lvl,
                                          minDistance=min_dist)

        if corners is not None:
            corners = np.int0(corners)

            for corner in corners:
                x, y = corner.ravel()
                cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)

        cv2.imshow('Shi-Tomasi Corner Detection', frame)

        elapsed_time = time.time() - start_time
        if elapsed_time < frame_interval:
            time.sleep(frame_interval - elapsed_time)

        if cv2.waitKey(1) == 27:
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    video_path = "G:\THESIS\SampleVideos\SMTracking_TankTop.mp4" 
    shi_tomasi_corner_detection(video_path)

  corners = np.int0(corners)


Integrating LK and Shi-Tomasi Together [Draft 1]

In [72]:
import cv2
import numpy as np

def track_features(video_path):
    
    cap = cv2.VideoCapture(video_path)

    # Parameters for Shi-Tomasi Corner Detection
    max_corners = 3000
    quality_level = 0.001
    min_distance = 0.5

    # Parameters for Lucas-Kanade Optical Flow
    lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 12, 0.01))

    # Read the first frame
    ret, prev_frame = cap.read()
    prev_frame = cv2.resize(prev_frame, (1280, 720))  # Resize the frame
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=max_corners, qualityLevel=quality_level, minDistance=min_distance)

    
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (1280, 720))  # Resize the frame

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        
        mask = np.zeros_like(prev_frame)

        # Calculate optical flow using Lucas-Kanade
        prev_frame = cv2.resize(prev_frame, (1280, 720))  # Resize the previous frame
        new_pts, status, error = cv2.calcOpticalFlowPyrLK(prev_gray, gray, prev_pts, None, **lk_params)

        # Select only valid points
        valid_new_pts = new_pts[status == 1]
        valid_prev_pts = prev_pts[status == 1]

        for i, (new, prev) in enumerate(zip(valid_new_pts, valid_prev_pts)):
            a, b = new.ravel()
            c, d = prev.ravel()
            a, b, c, d = int(a), int(b), int(c), int(d)  
            
            scaling_factor = 5 
            a_new = int(a - (a - c) * scaling_factor)
            b_new = int(b - (b - d) * scaling_factor)
            
            mask = cv2.line(mask, (a, b), (a_new, b_new), (0, 255, 0), 2)
            frame = cv2.circle(frame, (a, b), 3, (0, 255, 0), -1)
            

        result = cv2.add(frame, mask)  

        cv2.imshow('Frame', result)

        k = cv2.waitKey(30) & 0xff
        if k == 27:
            break

        
        prev_gray = gray.copy()
        prev_pts = valid_new_pts.reshape(-1, 1, 2)

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    video_path = "G:\THESIS\SampleVideos\SMTracking_LongSleeve.mp4"  
    track_features(video_path)


[Draft 2]

In [2]:

def track_features(video_path):
    
    cap = cv2.VideoCapture(video_path)

    max_corners = 3000
    quality_level = 0.001
    min_distance = 0.1 

    # Parameters for Lucas-Kanade Optical Flow
    lk_params = dict(winSize=(25, 25), maxLevel=3, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01))

    # Read the first frame
    ret, prev_frame = cap.read()
    prev_frame = cv2.resize(prev_frame, (1280, 720))  # Resize the frame
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=max_corners, qualityLevel=quality_level, minDistance=min_distance)

    frame_width = 1280
    frame_height = 720

    # ROI
    square_size = (670, 720) #ROI Size
    square_x = int(frame_width - square_size[0]) // 2
    square_y = int(frame_height - square_size[1]) // 2
    square_position = (square_x, square_y) 

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (1280, 720)) 

        gray = cv2.cvtColor (frame, cv2.COLOR_BGR2GRAY)

        # Create a mask that covers the square ROI
        mask = np.zeros(frame.shape[:2], dtype=np.uint8)
        x, y, w, h = square_position[0], square_position[1], square_size[0], square_size[1]
        mask[y:y + h, x:x + w] = 230  

        
        masked_frame = cv2.bitwise_and(frame, frame, mask=mask)

        # Calculate optical flow using Lucas-Kanade on the masked_frame
        prev_frame = cv2.resize(prev_frame, (1280, 720))  # Resize the previous frame
        new_pts, status, error = cv2.calcOpticalFlowPyrLK(prev_gray, gray, prev_pts, None, **lk_params)

        valid_new_pts = new_pts[status == 1]
        valid_prev_pts = prev_pts[status == 1]

        for i, (new, prev) in enumerate(zip(valid_new_pts, valid_prev_pts)):
            a, b = new.ravel()
            c, d = prev.ravel()
            a, b, c, d = int(a), int(b), int(c), int(d)

            if square_position[0] <= a <= square_position[0] + square_size[0] and square_position[1] <= b <= square_position[1] + square_size[1]:
                scaling_factor = 5
                a_new = int(a - (a - c) * scaling_factor)
                b_new = int(b - (b - d) * scaling_factor)

                #limit drawing to the ROI
                mask = cv2.line(mask, (a, b), (a_new, b_new), (0, 255, 0), 2)
                masked_frame = cv2.circle(masked_frame, (a, b), 3, (0, 255, 0), -1)

        result = cv2.add(frame, masked_frame)

        cv2.imshow('Frame', result)

        k = cv2.waitKey(30) & 0xff
        if k == 27:
            break

        prev_gray = gray.copy()
        prev_pts = valid_new_pts.reshape(-1, 1, 2)

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    video_path = "G:\THESIS\SampleVideos\SMTracking_LongSleeve.mp4"
    track_features(video_path)


Class Version 

In [8]:
class FeatureTracking:
    def __init__(self, video_path):
        self.cap = cv2.VideoCapture(video_path)
        self.max_corners =  3000
        self.quality_level = 0.001
        self.min_distance = 0.1

        self.lk_params = dict(winSize=(25, 25), maxLevel= 3, criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01))

        self.frame_width =  1280
        self.frame_height = 720

        #Region of Interest
        self.square_size = (670, 720)
        self.square_x = int(self.frame_width - self.square_size[0]) // 2 #since the square will be placed relative to the top left corner of the video
        self.square_y = int(self.frame_height - self.square_size[1]) // 2 #we want to make sure that the ROI is placed in the middle
        self.square_position = (self.square_x, self.square_y)

    def process_video(self):
        ret, prev_frame = self.cap.read()
        prev_frame = cv2.resize(prev_frame, (self.frame_width, self.frame_height))  # Resize the frame
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=self.max_corners, qualityLevel=self.quality_level, minDistance=self.min_distance)
        
        while True:
            ret, frame = self.cap.read()

            #convert to grayscale
            frame  = cv2.resize(frame, (self.frame_width, self.frame_height))
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            #create ROI
            mask = np.zeros(frame.shape[:2], dtype=np.uint8)
            x, y, w, h = self.square_position[0], self.square_position[1], self.square_size[0], self.square_size[1]
            mask[y:y + h, x:x + w] = 230
            masked_frame = cv2.bitwise_and(frame, frame, mask=mask)
            
            # Calculate optical flow using Lucas-Kanade on the masked_frame
            prev_frame = cv2.resize(prev_frame, (self.frame_width, self.frame_height))  # Resize the previous frame
            new_pts, status, error = cv2.calcOpticalFlowPyrLK(prev_gray, gray, prev_pts, None, **self.lk_params)

            valid_new_pts = new_pts[status == 1]
            valid_prev_pts = prev_pts[status == 1]

            for i, (new, prev) in enumerate(zip(valid_new_pts, valid_prev_pts)):
                a, b = new.ravel()
                c, d = prev.ravel()
                a, b, c, d = int(a), int(b), int(c), int(d)

                if self.square_position[0] <= a <= self.square_position[0] + self.square_size[0] and self.square_position[1] <= b <= self.square_position[1] + self.square_size[1]:
                    scaling_factor = 5
                    a_new = int(a - (a - c) * scaling_factor)
                    b_new = int(b - (b - d) * scaling_factor)

                    #limit drawing to the ROI
                    mask = cv2.line(mask, (a, b), (a_new, b_new), (0, 255, 0), 2)
                    masked_frame = cv2.circle(masked_frame, (a, b), 3, (0, 255, 0), -1)

            result = cv2.add(frame, masked_frame)

            cv2.imshow('Frame', result)

            k = cv2.waitKey(30) & 0xff
            if k == 27:
                break

            prev_gray = gray.copy()
            prev_pts = valid_new_pts.reshape(-1, 1, 2)

        self.cap.release()
        cv2.destroyAllWindows()
            
if __name__ == "__main__":
    video_path = "G:\THESIS\SampleVideos\SMTracking_LongSleeve.mp4"
    feature_tracker = FeatureTracking(video_path)
    feature_tracker.process_video()
        