# Template Search

Given a template image (e.g. `./anchor.png`), can we detect this in the video footage?

![Example template](./anchor.png)

In [98]:
import cv2
import os
import numpy as np 
import os
import pandas as pd
from tqdm import tqdm

pd.options.mode.chained_assignment = None  # default='warn'

## Part 1: Getting all trial directories

In [38]:
def get_deepest_subdirs(root_dir):
    deepest = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        if not dirnames:       # No subdirectories → leaf directory
            deepest.append(dirpath)
    return deepest

def get_trial_dirs(root_dir:str, verbose:bool=False):
    trial_dirs = get_deepest_subdirs(root_dir)
    if verbose: display(trial_dirs)
    return trial_dirs

In [39]:
trial_dirs = get_trial_dirs(os.path.join('.','captures'), verbose=True)

['.\\captures\\mq2\\58-no-android',
 '.\\captures\\mq2\\58-no-iphone',
 '.\\captures\\mq2\\58-no-mac',
 '.\\captures\\mq2\\58-no-pc_hor',
 '.\\captures\\mq2\\58-no-pc_hor-2',
 '.\\captures\\mq2\\58-no-pc_ver',
 '.\\captures\\mq2\\58-yes-android',
 '.\\captures\\mq2\\58-yes-iphone',
 '.\\captures\\mq2\\58-yes-mac',
 '.\\captures\\mq2\\58-yes-pc_hor',
 '.\\captures\\mq2\\58-yes-pc_ver',
 '.\\captures\\mq2\\68-no-android',
 '.\\captures\\mq2\\68-no-iphone',
 '.\\captures\\mq2\\68-no-mac',
 '.\\captures\\mq2\\68-no-pc_hor',
 '.\\captures\\mq2\\68-no-pc_ver',
 '.\\captures\\mq2\\68-yes-android',
 '.\\captures\\mq2\\68-yes-iphone',
 '.\\captures\\mq2\\68-yes-mac',
 '.\\captures\\mq2\\68-yes-pc_hor',
 '.\\captures\\mq2\\68-yes-pc_ver']

## Part 2: Extracting Frames from Videos

Each trial directory is expected to have the following files:

- `gaze_targets.csv`: The positions (both world and screen, for each left, center, or right eye) of each gaze target
- `events.csv`: Timestamps of the appearances of each gaze target during calibration
- `video.mp4` or `video.mov`: video files.

When we extract frames, we need to do the following:

1. Join `gaze_targets.csv` and `events.csv` to represent each gaze target's position.
2. Using the timestamps, extract the frame for each calibration target. We also extract the last frame.

In [107]:
def find_files_with_extensions(dir:str, extensions):
    found_files = []
    for root, _, files in os.walk(dir):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext.lower() in [e.lower() for e in extensions]:  # Case-insensitive comparison
                found_files.append(os.path.join(root, file))
    return found_files

class Frame:
    def __init__(self, target_number, filepath, frame):
        self.target_number = target_number
        self.filepath = filepath
        self.frame = frame
        self.template_bboxes = None
    
    def drawBBoxes(self, frame=None, bbox_color=[0,255,255], bbox_thickness=1, draw_centroids:bool=True, centroids_color=[0,255,255]):
        assert self.template_bboxes is not None, "Cannot draw bboxes that don't exist"
        outframe = self.frame.copy() if frame is None else frame.copy()
        for (x1, y1, x2, y2, cx, cy) in self.template_bboxes:
            outframe = cv2.rectangle(outframe, (x1, y1), (x2, y2), bbox_color, bbox_thickness)
            if draw_centroids:
                outframe = cv2.drawMarker(outframe, (int(cx), int(cy)), centroids_color, cv2.MARKER_CROSS, 20, 2)
        return outframe, self.template_bboxes

    def drawMeanCentroid(self, frame=None, centroid_color=[255,255,0]):
        assert self.template_bboxes is not None, "Cannot draw mean centroid from bboxes that don't exist"
        outframe = self.frame.copy() if frame is None else frame.copy()
        center = np.mean([[cx,cy] for (x1, y1, x2, y2, cx, cy) in self.template_bboxes], axis=0)
        outframe = cv2.drawMarker(outframe, (int(center[0]), int(center[1])), centroid_color, cv2.MARKER_CROSS,20,2)
        return outframe, center
    
    def drawMedianCentroid(self, frame=None, centroid_color=[0,0,0]):
        assert self.template_bboxes is not None, "Cannot draw mean centroid from bboxes that don't exist"
        outframe = self.frame.copy() if frame is None else frame.copy()
        center = np.median([[cx,cy] for (x1, y1, x2, y2, cx, cy) in self.template_bboxes], axis=0)
        outframe = cv2.drawMarker(outframe, (int(center[0]), int(center[1])), centroid_color, cv2.MARKER_TILTED_CROSS,20,2)
        return outframe, center

In [108]:
# Initialize
trials = []
timestamp_offset = 2.0

# Iterate through each trial. Use TQDM for visual feedback
pbar = tqdm(trial_dirs)
for root_dir in pbar:

    # Update progress bar description
    pbar.set_description(f"Extracting frames from \"{root_dir}\"")
    
    # Define filepaths
    events_filepath = os.path.join(root_dir, 'events.csv')
    targets_filepath = os.path.join(root_dir, 'gaze_targets.csv')
    frames_dir = os.path.join(root_dir, 'frames')

    # Extract csv files. Exit early if these don't exist
    events_df = pd.read_csv(events_filepath)
    targets_df = pd.read_csv(targets_filepath)

    # Filter out `Start` and `End` rows in `events_df`
    edf = events_df[~events_df['event'].isin(['Start','End'])]
    edf["target_number"] = edf["target_number"].astype(int)

    # Modify `targets_df` to rmeove `unix_ms` and typecast `target_number` as integer
    tdf = targets_df.drop(columns=['unix_ms'])
    tdf["target_number"] = tdf["target_number"].astype(int)
    
    # Inner Join on `edf` with `targets_df`
    df = pd.merge(left=edf, right=tdf, left_on='target_number', right_on='target_number')

    # Pre-emptively create `frames` directory
    os.makedirs(frames_dir, exist_ok=True)

    # Find `video` file in the root directory
    videos = find_files_with_extensions(root_dir, ['.mov','.mp4'])
    if len(videos) == 0:
        print(f"There are no video files in subdirectory \"{root_dir}\"")
        continue
    video_filename = videos[0]
    
    # Initialize 
    cap = cv2.VideoCapture(video_filename)
    if not cap.isOpened():
        print(f"Could not open video \"{video_filename}\"")
        continue
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []

    # Iterate through all frames
    for index, row in df.iterrows():
        target_time = row['timestamp'] + timestamp_offset
        frame_idx = int(target_time * fps)

        # Seek to the desired frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)

        # Read frame, move to next row if unable to read frame
        ok, frame = cap.read()
        if not ok:  
            print(f"Warning: Unable to read frame at {target_time}s (frame {frame_idx}).")
            continue
        frames.append(Frame(
            row['target_number'], 
            os.path.join(frames_dir, f"{row['target_number']}.jpg"), 
            frame))

    # Extract final frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 3)
    ok, last_frame = cap.read()
    if not ok:  print("Warning: Unable to read the last frame.")
    else:       frames.append(Frame('all', os.path.join(frames_dir, "all.jpg"), last_frame))

    # Release capture
    cap.release()

    # Print each frame, if they exist
    if len(frames) == 0:
        print("No frames detected!")
        continue
    os.makedirs(frames_dir, exist_ok=True)
    for f in frames:
        cv2.imwrite(f.filepath, f.frame)

    # Add to trial
    trials.append({
        'root_dir': root_dir,
        'frames': frames
    })


Extracting frames from ".\captures\mq2\68-yes-pc_ver": 100%|██████████| 21/21 [00:30<00:00,  1.46s/it] 


## Part 3: Template Matching

In [109]:
def estimate_template_from_image(
        src_img, 
        template_img, 
        min_size=10, 
        max_size=50, 
        delta_size=5, 
        thresh=0.9, 
        verbose=True):
    
    # Initialize bounding boxes
    bboxes = []

    # Iterate through possible sizes of the template, upwards to half of the size
    for p in np.arange(min_size, max_size, delta_size):
        
        # Resize the frame
        template_resize = cv2.resize(template_img, (p,p))
       
        # Get particular attributes of the image itself. 
        # We assume transparency, so we have to separate alpha from bgr
        template = template_resize[:,:,0:3]
        alpha = template_resize[:,:,3]
        alpha = cv2.merge([alpha,alpha,alpha])
        
        # get the width and height of the template
        h,w = template.shape[:2]
        
        # Prepare possible locations where the template matches
        loc = []
        
        # Find those matches.
        res = cv2.matchTemplate(src_img, template, cv2.TM_CCORR_NORMED, mask=alpha)

        # threshold
        loc = np.where(res >= thresh)
        if len(loc) > 0:
            for pt in zip(*loc[::-1]):
                bboxes.append((pt[0],pt[1],pt[0]+w,pt[1]+h, pt[0]+(w/2), pt[1]+(h/2)))
    
    # Print and return
    if verbose: print(f"# Detected Bounding Boxes: {len(bboxes)}")
    return bboxes

In [111]:
# Use OpenCV to read anchor image
anchor_filepath = './anchor.png'
anchor_img = cv2.imread(anchor_filepath, cv2.IMREAD_UNCHANGED)

# Initialize
estimations = []

# Iterate through each trial. Use TQDM for visual feedback
pbar = tqdm(trials)
for trial in pbar:

    # Update progress bar description
    pbar.set_description(f"Estimating anchors from frames in \"{trial['root_dir']}\"")

    # Generate output directory for drawn images
    outdir = os.path.join(trial['root_dir'], 'anchor_frames')
    os.makedirs(outdir, exist_ok=True)

    # Iterate through frames
    for f in frames:
        cv2.imwrite(f.filepath, f.frame)

    # For each frame, attempt to estimate the anchor
    for frame in trial['frames']:

        # Estimate
        frame.template_bboxes = estimate_template_from_image(frame.frame, anchor_img, verbose=False)
        
        # Draw
        mean_centroid_frame, mean_centroid = frame.drawMeanCentroid()
        median_centroid_frame, median_centroid = frame.drawMedianCentroid(frame=mean_centroid_frame)

        # Render
        cv2.imwrite(os.path.join(outdir, f"{frame.target_number}.jpg"), median_centroid_frame)

Estimating anchors from frames in ".\captures\mq2\68-yes-pc_ver": 100%|██████████| 21/21 [14:33<00:00, 41.62s/it] 


In [None]:
def find_template_match(
    frame, 
    frame_filename,
    template_filename, 
    min_size=10, 
    max_size=50, 
        delta_size=5, 
        thresh=0.9,
        draw_bbox=False,
        draw_centers=True,
        bbox_color=[0,255,255],
        bbox_thickness=1,
        verbose=False):
    
    # Load frame using opencv
    #frame = cv2.imread(frame_filename)
    
    # Load template using opencv
    template_all = cv2.imread(template_filename, cv2.IMREAD_UNCHANGED)
    # Prep boxes list
    boxes = []
    # Iterate through possible sizes of the template, upwards to half of the size
    for p in np.arange(min_size, max_size, delta_size):
        # Resize the frame
        template_resize = cv2.resize(template_all, (p,p))
        # Get particular attributes of the image itself. 
        # We assume transparency, so we have to separate alpha from bgr
        template = template_resize[:,:,0:3]
        alpha = template_resize[:,:,3]
        alpha = cv2.merge([alpha,alpha,alpha])
        # get the width and height of the template
        h,w = template.shape[:2]
        # Prepare possible locations where the template matches
        loc = []
        # Find those matches.
        res = cv2.matchTemplate(
            frame,
            template,
            cv2.TM_CCORR_NORMED,
            mask=alpha
        )
        # threshold by a 
        loc = np.where(res >= thresh)
        if len(loc) > 0:
            for pt in zip(*loc[::-1]):
                boxes.append((pt[0],pt[1],pt[0]+w,pt[1]+h, pt[0]+(w/2), pt[1]+(h/2)))

    
    centers = []
    for (x1, y1, x2, y2, cx, cy) in boxes:
        #result = cv2.rectangle(result, (x1, y1), (x2, y2), bbox_color, bbox_thickness)
        centers.append([cx,cy])

    mean_center = np.mean(centers, axis=0)
    median_center = np.median(centers, axis=0)
    result = None
    
    if verbose:
        print(f"ESTIMATED MEAN POSITION: {mean_center}")
        print(f"ESTIMATED MEDIAN POSITION: {median_center}")
    
    if draw_bbox or draw_centers:
        result = frame.copy()
        if draw_bbox:
             for (x1, y1, x2, y2, cx, cy) in boxes:
                 result = cv2.rectangle(result, (x1, y1), (x2, y2), bbox_color, bbox_thickness)
        if draw_centers:
            result = cv2.drawMarker(result, (int(mean_center[0]), int(mean_center[1])), (0,255,255),cv2.MARKER_CROSS,20,2)
            result = cv2.drawMarker(result, (int(median_center[0]), int(median_center[1])), (255,255,0),cv2.MARKER_TILTED_CROSS,20,2)

        # Save resulting image
        frame_file_seperated, _ = os.path.splitext(frame_filename)
        result_path =  frame_file_seperated + "_matched" + ".jpg"
        cv2.imwrite(result_path, result)
    
    return mean_center, median_center, result

In [91]:
_TEMPLATE = './anchor.png'
last_centers = [find_template_match(f['frame'], f['filepath'], _TEMPLATE, verbose=True) for t in trials for f in t['frames']]

ESTIMATED MEAN POSITION: [1321.76750616  736.8644771 ]
ESTIMATED MEDIAN POSITION: [1322.  737.]
ESTIMATED MEAN POSITION: [1518.1938669   539.80674534]
ESTIMATED MEDIAN POSITION: [1518.5  539.5]
ESTIMATED MEAN POSITION: [1124.56738807  539.74867236]
ESTIMATED MEDIAN POSITION: [1124.5  539.5]
ESTIMATED MEAN POSITION: [1124.51453011  933.72879524]
ESTIMATED MEDIAN POSITION: [1124.5  934. ]
ESTIMATED MEAN POSITION: [1519.66681068  932.15324166]
ESTIMATED MEDIAN POSITION: [1519.5  932. ]
ESTIMATED MEAN POSITION: [1896.29746234  736.56722075]
ESTIMATED MEDIAN POSITION: [1896.5  736.5]
ESTIMATED MEAN POSITION: [1321.8775154   162.05283403]
ESTIMATED MEDIAN POSITION: [1322.  162.]
ESTIMATED MEAN POSITION: [746.99840217 736.55377871]
ESTIMATED MEDIAN POSITION: [747.  736.5]
ESTIMATED MEAN POSITION: [1321.86441881 1311.46223386]
ESTIMATED MEDIAN POSITION: [1322.  1311.5]
ESTIMATED MEAN POSITION: [1321.54794393  736.77133692]
ESTIMATED MEDIAN POSITION: [1322.   736.5]
ESTIMATED MEAN POSITION: [64