In [16]:
import numpy as np

def average_boxes(boxes, iou_thresh=0.5):
    if len(boxes) == 0:
        return []

    boxes = np.array(boxes)
    keep = []

    while len(boxes) > 0:
        ref_box = boxes[0]
        rest = boxes[1:]

        x1, y1, x2, y2, score = ref_box
        ious = []

        for box in rest:
            xx1 = max(x1, box[0])
            yy1 = max(y1, box[1])
            xx2 = min(x2, box[2])
            yy2 = min(y2, box[3])
            inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
            union = (x2 - x1) * (y2 - y1) + (box[2] - box[0]) * (box[3] - box[1]) - inter
            iou = inter / union if union > 0 else 0
            ious.append(iou)

        ious = np.array([1.0] + ious)
        cluster = boxes[np.where(ious >= iou_thresh)[0]]

        avg_box = np.average(cluster, axis=0, weights=cluster[:, 4])  # Weighted by score
        keep.append(avg_box)

        boxes = np.delete(boxes, np.where(ious >= iou_thresh)[0], axis=0)

    return keep


## Using **FFT Filters** for patern matching

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time

# --- Step 1: High-pass filter and FFT helpers ---

def high_pass_filter_fft(image):
    """
    Applies a high-pass filter using FFT to remove low-frequency components.

    Parameters:
        image (2D np.ndarray): Grayscale image.
        radius (int): Radius of the low-frequency region to suppress.

    Returns:
        img_back (2D np.ndarray): High-pass filtered image (edges and fine details).
    """

    # Step 1: Forward FFT
    f = np.fft.fft2(image)
    fshift = np.fft.fftshift(f)

    # Step 2: Create High-Pass Mask
    rows, cols = image.shape
    crow, ccol = rows // 2, cols // 2

    radius = min(rows, cols) // 10  # Adjust radius based on image size
    radius = 8
    mask = np.ones((rows, cols), np.uint8)
    mask[crow - radius:crow + radius, ccol - radius:ccol + radius] = 0  # Suppress low frequencies

    # Step 3: Apply mask and inverse FFT
    fshift_filtered = fshift * mask
    f_ishift = np.fft.ifftshift(fshift_filtered)
    img_back = np.fft.ifft2(f_ishift)
    img_back = np.abs(img_back)

    # Step 4: Normalize for consistent scaling (0 to 1)
    img_back = (img_back - np.min(img_back)) / (np.max(img_back) - np.min(img_back))

    return img_back

# --- Step 2: Rotate template with padding ---
def rotate_image_with_padding(image, angle):
    h, w = image.shape
    center = (w // 2, h // 2)
    rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    cos = np.abs(rot_mat[0, 0])
    sin = np.abs(rot_mat[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    rot_mat[0, 2] += (nW / 2) - center[0]
    rot_mat[1, 2] += (nH / 2) - center[1]

    return cv2.warpAffine(image, rot_mat, (nW, nH), borderValue=255)

# --- Step 3: Convolution with dot product ---
def convolve_and_get_bboxes(image, template, threshold):
    h, w = template.shape
    ih, iw = image.shape
    bboxes = []

    stride_y = max(1, h // 20)
    stride_x = max(1, w // 20)

    for y in range(0, ih - h + 1, stride_y):
        for x in range(0, iw - w + 1, stride_x):
            patch = image[y:y+h, x:x+w]
            dot = np.dot(patch.flatten(), template.flatten())
            norm = np.linalg.norm(patch) * np.linalg.norm(template)
            score = dot / (norm + 1e-6)
            print(f"Score at ({x}, {y}): {score:.4f} dot {dot} and norm {norm}")  # Debugging output
            if score >= threshold:
                bboxes.append((x, y, x + w, y + h, score))
    return bboxes

# --- Step 4: Non-Maximum Suppression ---
def non_max_suppression_fast(boxes, iou_thresh=0.3):
    if len(boxes) == 0:
        return []

    boxes = np.array(boxes)
    x1 = boxes[:,0]; y1 = boxes[:,1]; x2 = boxes[:,2]; y2 = boxes[:,3]; scores = boxes[:,4]
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(tuple(boxes[i]))

        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        inter = w * h
        iou = inter / (areas[i] + areas[order[1:]] - inter)

        order = order[1:][iou <= iou_thresh]

    return keep

# --- Step 5: Main Pipeline ---
def main():
    
    # Load and resize full image
    img = cv2.imread('Dataset\\test\\images\\image_20250321_200236_lbent_3.png', cv2.IMREAD_GRAYSCALE)
    img_resized = cv2.resize(img, (240, 240))
    img_filtered = high_pass_filter_fft(img_resized)
    #cv2.imshow("img",img_filtered)

    # Load and resize template
    template = cv2.imread("image_20250321_201450_lbent_1.png", cv2.IMREAD_GRAYSCALE)
    #template_filteredr= high_pass_filter_fft(template)
    template_resized = cv2.resize(template, (64, 64))
    template_filtered = high_pass_filter_fft(template_resized)
    #cv2.imshow("temp",template_filtered)
    # Generate rotated templates
    angles = [0,45,90,135, 180,225, 270,315]
    rotated_templates = [rotate_image_with_padding(template_filtered, a) for a in angles]
    start = time.time()
    # Collect bboxes from all rotations
    all_bboxes = []
    for temp in rotated_templates:
        bboxes = convolve_and_get_bboxes(img_filtered, temp, threshold=0.67)
        all_bboxes.extend(bboxes)

    # Apply NMS
    final_bboxes = non_max_suppression_fast(all_bboxes, iou_thresh=0.3)
    final_bboxes = average_boxes(final_bboxes, iou_thresh=0.1)
    
    end = time.time()
    # Draw and show
    out = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2BGR)
    for (x1, y1, x2, y2, score) in final_bboxes:
        cv2.rectangle(out, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(out, f"Score: {score: .2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (122, 0, 0), 2)

    

    print(f"time = {end - start:.2f} seconds")
    print(f"Number of detections: {len(final_bboxes)}")  

    cv2.imshow("Detections", out)
    cv2.imwrite("output.png", out)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
    


Score at (0, 0): 0.5283 dot 39.509742084795185 and norm 74.78255159675456
Score at (3, 0): 0.5302 dot 41.51511629625109 and norm 78.29598048130661
Score at (6, 0): 0.5070 dot 41.14425719837676 and norm 81.14881807361519
Score at (9, 0): 0.5180 dot 43.51124485591954 and norm 84.00621376670578
Score at (12, 0): 0.5037 dot 43.72566408495149 and norm 86.81163180803321
Score at (15, 0): 0.4969 dot 44.830560214925626 and norm 90.2219436805121
Score at (18, 0): 0.5168 dot 47.89262864459832 and norm 92.67607808952131
Score at (21, 0): 0.5463 dot 51.69551183628571 and norm 94.63322727004159
Score at (24, 0): 0.5547 dot 53.87413714089938 and norm 97.11642868808504
Score at (27, 0): 0.5603 dot 55.937395733381535 and norm 99.83163764184975
Score at (30, 0): 0.5895 dot 60.06371596112995 and norm 101.89408695812203
Score at (33, 0): 0.6497 dot 67.53042177487222 and norm 103.94519924988249
Score at (36, 0): 0.6790 dot 73.01181680761957 and norm 107.52153027168374
Score at (39, 0): 0.6512 dot 72.43617

## Using **Opencv TemplateMatching** method

In [19]:
import cv2
import numpy as np
import time



# Rotate image with padding
def rotate_image_with_padding(image, angle):
    h, w = image.shape
    center = (w // 2, h // 2)
    rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    cos, sin = np.abs(rot_mat[0, 0]), np.abs(rot_mat[0, 1])
    nW, nH = int(h * sin + w * cos), int(h * cos + w * sin)
    rot_mat[0, 2] += (nW / 2) - center[0]
    rot_mat[1, 2] += (nH / 2) - center[1]
    return cv2.warpAffine(image, rot_mat, (nW, nH), borderValue=255)

# Template matching with score threshold
def match_template(image, template, threshold=0.5):
    result = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
    yx = np.where(result >= threshold)
    h, w = template.shape
    return [(x, y, x + w, y + h, result[y, x]) for y, x in zip(*yx)]

# Non-Maximum Suppression
def nms(boxes, iou_thresh=0.3):
    if not boxes:
        return []
    boxes = np.array(boxes)
    x1, y1, x2, y2, scores = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]
    keep = []

    while order.size:
        i = order[0]
        keep.append(tuple(boxes[i]))
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        iou = (w * h) / (areas[i] + areas[order[1:]] - (w * h) + 1e-6)
        order = order[1:][iou <= iou_thresh]

    return keep

# Main function
def main():
    start = time.time()

    img = cv2.imread('Dataset/test/images/image_20250321_200736_lbent_3.png', cv2.IMREAD_GRAYSCALE)
    template = cv2.imread("image_20250321_201450_lbent_1.png", cv2.IMREAD_GRAYSCALE)

    img = cv2.resize(img, (260, 260))
    template = cv2.resize(template, (64,64))

    angles = [0, 45, 90, 135, 180, 225, 270, 315]
    all_boxes = []

    for angle in angles:
        rotated = rotate_image_with_padding(template, angle)
        bboxes = match_template(img, rotated, threshold=0.48)
        all_boxes.extend(bboxes)

    final_boxes = nms(all_boxes, iou_thresh=0.6)
    final_boxes = average_boxes(all_boxes,iou_thresh=0.22)

    output = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    for x1, y1, x2, y2, score in final_boxes:
        cv2.rectangle(output, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(output, f"{score:.2f}", (int(x1), int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (222, 0, 0), 1)

    print(f"Time taken: {time.time() - start:.2f} sec")
    print(f"Detections: {len(final_boxes)}")

    cv2.imshow("Detections", output)
    cv2.imwrite("detections_output.png", output)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Time taken: 0.13 sec
Detections: 5


## Pytorch version with single **Convolution Line**

In [94]:
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from torchvision.ops import nms
import time
import math

# Convert red-channel grayscale image to normalized PyTorch tensor
def to_tensor_from_red_channel(img_bgr):
    red_channel = img_bgr[:, :, 2].astype(np.float32) / 255.0  # Normalize to [0, 1]
    return torch.from_numpy(red_channel).unsqueeze(0).unsqueeze(0).contiguous()  # [1, 1, H, W]

# Rotate a single image tensor
def rotate_tensor_image(tensor_img, angle_deg):
    angle_rad = math.radians(angle_deg)
    theta = torch.tensor([
        [math.cos(angle_rad), -math.sin(angle_rad), 0],
        [math.sin(angle_rad),  math.cos(angle_rad), 0]
    ], dtype=torch.float32, device=tensor_img.device)
    grid = F.affine_grid(theta.unsqueeze(0), tensor_img.size(), align_corners=False)
    rotated = F.grid_sample(tensor_img, grid, padding_mode='zeros', align_corners=False)
    return rotated

# Rotate template batch
def get_rotated_templates(template_tensor, angles):
    rotated_templates = []
    for angle in angles:
        rotated = rotate_tensor_image(template_tensor, angle)
        rotated -= rotated.mean()  # Normalize each rotated template
        rotated_templates.append(rotated)
    return torch.cat(rotated_templates, dim=0)  # [N, 1, h, w]

# Batched template matching using conv2d
def match_template_batched(image_tensor, templates_batch, threshold=0.48):
    N, _, h, w = templates_batch.shape
    response = F.conv2d(image_tensor, templates_batch,stride=1)  # [1, N, H−h+1, W−w+1]
    response_np = response.squeeze(0).detach().cpu().numpy()

    all_boxes = []
    for i in range(N):
        r = response_np[i]
        yx = np.where(r >= threshold)
        for y, x in zip(*yx):
            score = r[y, x]
            all_boxes.append((x, y, x + w, y + h, score))
            #print(f"Template {i}, Score at ({x}, {y}): {score:.4f}")  # Debug output
    return all_boxes

# Torchvision NMS
def nms_torch(boxes, iou_threshold=0.6):
    if not boxes:
        return []
    boxes_np = np.array(boxes)
    boxes_tensor = torch.tensor(boxes_np[:, :4], dtype=torch.float32)
    scores_tensor = torch.tensor(boxes_np[:, 4], dtype=torch.float32)
    keep_indices = nms(boxes_tensor, scores_tensor, iou_threshold)
    return [(*boxes_np[i][:4], boxes_np[i][4]) for i in keep_indices]

# MAIN
def main():
    start = time.time()

    # Load color images and convert using red channel only
    img_bgr = cv2.imread('Dataset/test/images/image_20250321_200736_lbent_3.png')
    template_bgr = cv2.imread('image_20250321_201450_lbent_1.png')
    img_bgr = cv2.resize(img_bgr, (228, 228))
    template_bgr = cv2.resize(template_bgr, (64, 64))

    image_tensor = to_tensor_from_red_channel(img_bgr).cuda()
    template_tensor = to_tensor_from_red_channel(template_bgr).cuda()

    angles = [0, 45, 90, 135, 180, 225, 270, 315]
    rotated_templates = get_rotated_templates(template_tensor, angles)  # [N, 1, h, w]

    all_boxes = match_template_batched(image_tensor, rotated_templates, threshold=80)
    final_boxes = nms_torch(all_boxes, iou_threshold=0.0)

    print(f"Time taken: {time.time() - start:.2f} sec")

    output = cv2.cvtColor((image_tensor.squeeze().cpu().numpy() * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
    for x1, y1, x2, y2, score in final_boxes:
        cv2.rectangle(output, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(output, f"{score:.2f}", (int(x1), int(y1) - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 100), 1)

    
    print(f"Detections: {len(final_boxes)}")

    cv2.imshow("Detections", output)
    cv2.imwrite("detections_output_batched.png", output)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Time taken: 0.12 sec
Detections: 3


## real time inference


In [14]:
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from torchvision.ops import nms
import time
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")  # Force CPU for compatibility
print(f"Using device: {device}")

def to_tensor_from_red_channel(img_bgr):
    red_channel = img_bgr[:, :, 2].astype(np.float32) / 255.0
    return torch.from_numpy(red_channel).unsqueeze(0).unsqueeze(0).contiguous()

def rotate_tensor_image(tensor_img, angle_deg):
    angle_rad = math.radians(angle_deg)
    theta = torch.tensor([
        [math.cos(angle_rad), -math.sin(angle_rad), 0],
        [math.sin(angle_rad),  math.cos(angle_rad), 0]
    ], dtype=torch.float32, device=tensor_img.device)
    grid = F.affine_grid(theta.unsqueeze(0), tensor_img.size(), align_corners=False)
    rotated = F.grid_sample(tensor_img, grid, padding_mode='zeros', align_corners=False)
    return rotated

def get_rotated_templates(template_tensor, angles):
    rotated_templates = []
    for angle in angles:
        rotated = rotate_tensor_image(template_tensor, angle)
        rotated -= rotated.mean()
        rotated_templates.append(rotated)
    return torch.cat(rotated_templates, dim=0)

def match_template_batched(image_tensor, templates_batch, threshold=0.48):
    N, _, h, w = templates_batch.shape
    response = F.conv2d(image_tensor, templates_batch, stride=1)
    response_np = response.squeeze(0).detach().cpu().numpy()

    all_boxes = []
    for i in range(N):
        r = response_np[i]
        yx = np.where(r >= threshold)
        for y, x in zip(*yx):
            score = r[y, x]
            all_boxes.append((x, y, x + w, y + h, score))
    return all_boxes

def nms_torch(boxes, iou_threshold=0.6):
    if not boxes:
        return []
    boxes_np = np.array(boxes)
    boxes_tensor = torch.tensor(boxes_np[:, :4], dtype=torch.float32)
    scores_tensor = torch.tensor(boxes_np[:, 4], dtype=torch.float32)
    keep_indices = nms(boxes_tensor, scores_tensor, iou_threshold)
    return [(*boxes_np[i][:4], boxes_np[i][4]) for i in keep_indices]

def main():
    template_bgr = cv2.imread('image_20250321_201450_lbent_1.png')
    template_bgr = cv2.resize(template_bgr, (64, 64))
    template_tensor = to_tensor_from_red_channel(template_bgr).to(device)
    angles = [0, 45, 90, 135, 180, 225, 270, 315]
    rotated_templates = get_rotated_templates(template_tensor, angles).to(device)

    url = 'http://192.168.8.100:8080/video'  # Replace with your IP camera stream
    cap = cv2.VideoCapture(url)
    if not cap.isOpened():
        print(f"Unable to open video stream at {url}")
        return

    print("Press 'q' to quit.")
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame.")
            break

        frame = cv2.resize(frame, (228, 228))
        image_tensor = to_tensor_from_red_channel(frame).to(device)

        start = time.time()
        all_boxes = match_template_batched(image_tensor, rotated_templates, threshold=80)
        final_boxes = nms_torch(all_boxes, iou_threshold=0.0)
        

        output = cv2.cvtColor((image_tensor.squeeze().cpu().numpy() * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
        for x1, y1, x2, y2, score in final_boxes:
            cv2.rectangle(output, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(output, f"{score:.2f}", (int(x1), int(y1) - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 100), 1)
        elapsed = time.time() - start
        if elapsed > 0:
            
            cv2.putText(output, f"FPS: {1/elapsed:.2f}", (5, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
            cv2.putText(output,f"time: {elapsed:.2f} sec", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
        cv2.imshow("Real-Time Detection", output)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    with torch.no_grad():
        main()


Using device: cpu
Press 'q' to quit.
Failed to grab frame.
