# ORB object recognition

In [None]:
import cv2, numpy as np, matplotlib.pyplot as plt
from pathlib import Path
from collections import defaultdict
from tqdm.auto import tqdm

plt.rcParams['figure.figsize'] = (7, 4)
print('OpenCV', cv2.__version__)

# === paths ===
keys_dir   = Path('dataset/keys')
video_path = Path('dataset/video.mp4')
output_dir = Path('results')
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# === hyper‑params ===
ORB_PARAMS = dict(
    nfeatures=3000,
    scaleFactor=1.2,
    nlevels=8,
    edgeThreshold=15,
    fastThreshold=20,
)

RATIO_TEST       = 0.8       # Lowe ratio
MIN_INLIERS      = 10        # minimum inliers
MIN_INLIER_RATIO = 0.2       # inliers / good_matches


In [None]:
def preprocess(img_bgr, blur=True, clahe=True):
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    if blur:
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
    if clahe:
        gray = cv2.createCLAHE(3.) .apply(gray)
    return gray

In [None]:
def create_detector():
    return cv2.ORB_create(**ORB_PARAMS)

def create_flann_matcher():
    index_params = dict(algorithm=6,
                        table_number=12,  
                        key_size=20,     
                        multi_probe_level=2)  
    search_params = dict(checks=50)
    return cv2.FlannBasedMatcher(index_params, search_params)

In [None]:
def load_key_images(detector, folder):
    db = []
    for path in sorted(folder.iterdir()):
        if path.suffix.lower() not in {'.jpg', '.jpeg', '.png', '.bmp'}:
            continue
        img_bgr = cv2.imread(str(path))
        gray = preprocess(img_bgr)
        kp, des = detector.detectAndCompute(gray, None)
        if des is not None:
            db.append({'name': path.stem, 'kp': kp, 'des': des, 'shape': gray.shape})
    return db

In [None]:
def good_matches(matcher, d_query, d_train):
    """Return good matches after Lowe ratio."""
    if d_query is None or d_train is None:
        return []
    knn = matcher.knnMatch(d_query, d_train, k=2)
    good = []
    for pair in knn:
        if len(pair) == 2:
            m, n = pair
            if m.distance < RATIO_TEST * n.distance:
                good.append(m)
    return good

In [None]:
def annotate(frame, box, label):
    thickness = max(2, int(0.004 * frame.shape[1]))
    cv2.polylines(frame, [np.int32(box)], True, (0, 255, 0), thickness, cv2.LINE_AA)
    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2, cv2.LINE_AA)

In [None]:
def process_video():
    det = create_detector()
    matcher = create_flann_matcher()
    key_db = load_key_images(det, keys_dir)
    print('Loaded', len(key_db), 'key images')

    cap = cv2.VideoCapture(str(video_path))
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or None
    fps = cap.get(cv2.CAP_PROP_FPS) or 30
    w, h = int(cap.get(3)), int(cap.get(4))
    writer = cv2.VideoWriter(str(output_dir/'orb.mp4'),
                              cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))

    stats = defaultdict(int)
    pbar = tqdm(total=total, desc='video', unit='frame')

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray_f = preprocess(frame, blur=False)
        kp_f, des_f = det.detectAndCompute(gray_f, None)
        best = None
        best_matches = []
        best_H = None

        for db in key_db:
            gm = good_matches(matcher, db['des'], des_f)
            if len(gm) < MIN_INLIERS:
                continue
            src_pts = np.float32([db['kp'][m.queryIdx].pt for m in gm]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp_f[m.trainIdx].pt for m in gm]).reshape(-1, 1, 2)
            H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            if H is None:
                continue
            inliers = mask.ravel().sum()
            if inliers >= MIN_INLIERS and inliers / len(gm) >= MIN_INLIER_RATIO:
                if inliers > len(best_matches):
                    best = db
                    best_matches = gm
                    best_H = H

        if best is not None:
            h0, w0 = best['shape']
            corners = np.float32([[0, 0], [w0, 0], [w0, h0], [0, h0]]).reshape(-1, 1, 2)
            box = cv2.perspectiveTransform(corners, best_H)
            annotate(frame, box, f"{best['name']} ({len(best_matches)})")
            stats[best['name']] += len(best_matches)

        writer.write(frame)
        pbar.update(1)

    pbar.close()
    cap.release(); writer.release()
    return stats

In [None]:
stats = process_video()
print('Done')

In [None]:
if stats:
    names = list(stats.keys())
    counts = [stats[n] for n in names]
    plt.bar(names, counts)
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Cumulative matches')
    plt.title('Good matches per key image')
    plt.show()

In [None]:
cap = cv2.VideoCapture(str(output_dir/'orb.mp4'))
ret, fr = cap.read(); cap.release()
if ret:
    fr_rgb = cv2.cvtColor(fr, cv2.COLOR_BGR2RGB)
    plt.imshow(fr_rgb)
    plt.title('Annotated frame sample')
    plt.axis('off')
    plt.show()