In [4]:
# === Setup ===
import os
from pathlib import Path
import cv2
import numpy as np

# Output folder
res_path = Path("results")
res_path.mkdir(parents=True, exist_ok=True)

# Load inputs once (BGR)
ref_img_color = cv2.imread("reference_img.png", cv2.IMREAD_COLOR)
tgt_img_color = cv2.imread("align_this.jpg",   cv2.IMREAD_COLOR)

if ref_img_color is None or tgt_img_color is None:
    raise FileNotFoundError("Please place 'reference_img.png' and 'align_this.jpg' next to this notebook.")

In [5]:
def harris_corner_detection(reference_image_bgr):
    """
    Harris Corner Detection (visualization).
    - Input: reference_image_bgr (BGR color image)
    - Output (saved): results/harris.png
    """
    gray = cv2.cvtColor(reference_image_bgr, cv2.COLOR_BGR2GRAY)
    gray_f = np.float32(gray)

    # Harris parameters (standard choices)
    dst = cv2.cornerHarris(gray_f, blockSize=2, ksize=3, k=0.04)
    dst = cv2.dilate(dst, None)  # visualization only

    vis = reference_image_bgr.copy()
    vis[dst > 0.01 * dst.max()] = [0, 0, 255]  # mark corners in red (BGR)

    out_path = res_path / "harris.png"
    cv2.imwrite(str(out_path), vis)

harris_out = harris_corner_detection(ref_img_color)
print(f"Saved: {res_path}/harris.png")

Saved: results/harris.png


In [6]:
def align_images_sift(image_to_align_bgr, reference_image_bgr,
                      max_features=10, good_match_precent=0.7):
    """
    Align `image_to_align_bgr` onto `reference_image_bgr` using SIFT + FLANN + Homography.

    Parameters (assignment-consistent & tutorial-faithful):
      - good_match_precent (0.7): Lowe-ratio threshold.
      - max_features (10): used as MIN_MATCH_COUNT = 10 (minimum #good matches before homography),
        NOT as a hard limit on SIFT keypoints (limiting SIFT to 10 typically breaks homography).

    Outputs (saved to 'results/'):
      - aligned.png : target warped into the reference frame (page 2)
      - matches.png : inlier matches visualization (page 3)

    Returns:
      - H_tgt2ref (3x3): homography mapping target -> reference
    """
    # Convert to grayscale for feature extraction
    ref_gray = cv2.cvtColor(reference_image_bgr, cv2.COLOR_BGR2GRAY)  # img1 (reference)
    tgt_gray = cv2.cvtColor(image_to_align_bgr,  cv2.COLOR_BGR2GRAY)  # img2 (target)

    # SIFT (do NOT clamp to 10)
    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(ref_gray, None)
    kp2, des2 = sift.detectAndCompute(tgt_gray, None)
    if des1 is None or des2 is None or len(kp1) < 2 or len(kp2) < 2:
        vis = cv2.drawKeypoints(ref_gray, kp1 or [], None, color=(0, 255, 0))
        cv2.imwrite(str(res_path / "matches.png"), vis)
        raise RuntimeError("Not enough SIFT features detected.")

    # FLANN (KD-Tree) + kNN (tutorial)
    FLANN_INDEX_KDTREE = 1
    flann = cv2.FlannBasedMatcher(dict(algorithm=FLANN_INDEX_KDTREE, trees=5),
                                  dict(checks=50))
    knn = flann.knnMatch(des1, des2, k=2)  # Ref -> Target

    # Lowe-ratio filter
    ratio = float(good_match_precent)
    good = [m for m, n in knn if m.distance < ratio * n.distance]

    # Minimum good matches (interpret 'max_features=10' as MIN_MATCH_COUNT=10)
    MIN_MATCH_COUNT = int(max_features) if max_features and max_features > 0 else 10
    if len(good) <= MIN_MATCH_COUNT:
        dbg = cv2.drawMatches(ref_gray, kp1, tgt_gray, kp2, good, None, flags=2)
        cv2.imwrite(str(res_path / "matches.png"), dbg)
        raise RuntimeError(f"Not enough good matches: {len(good)} <= {MIN_MATCH_COUNT}")

    # Homography: Ref -> Target (tutorial order)
    src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)  # ref
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)  # target
    H_ref2tgt, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    if H_ref2tgt is None:
        dbg = cv2.drawMatches(ref_gray, kp1, tgt_gray, kp2, good, None, flags=2)
        cv2.imwrite(str(res_path / "matches.png"), dbg)
        raise RuntimeError("Homography estimation failed (H=None).")

    # Warp: Target -> Reference (use inverse)
    H_tgt2ref = np.linalg.inv(H_ref2tgt)
    h, w = ref_gray.shape
    aligned = cv2.warpPerspective(image_to_align_bgr, H_tgt2ref, (w, h))
    cv2.imwrite(str(res_path / "aligned.png"), aligned)

    # Matches (inliers only)
    matchesMask = (mask.ravel().tolist() if mask is not None else None)
    vis = cv2.drawMatches(ref_gray, kp1, tgt_gray, kp2, good, None,
                          matchesMask=matchesMask,
                          flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    cv2.imwrite(str(res_path / "matches.png"), vis)

    return H_tgt2ref

# Run alignment (pages 2 & 3)
H = align_images_sift(
    image_to_align_bgr=tgt_img_color,
    reference_image_bgr=ref_img_color,
    max_features=10,           # interpreted as MIN_MATCH_COUNT (tutorial logic)
    good_match_precent=0.7     # Lowe-ratio
)
print(f"Saved: {res_path}/aligned.png")
print(f"Saved: {res_path}/matches.png")

Saved: results/aligned.png
Saved: results/matches.png


In [7]:
from PIL import Image, ImageDraw, ImageFont

def _add_caption(img_path, caption_text):
    """
    Add a prominent caption band on top of an image and return a PIL Image.
    Tries to use a TrueType font (DejaVuSans) with a relatively large size.
    Falls back to default font if the TTF is unavailable.
    """
    img = Image.open(img_path).convert("RGB")
    W, H = img.size

    # Try a large TTF font; fallback to default if missing
    font = None
    for candidate in ["DejaVuSans.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"]:
        try:
            font = ImageFont.truetype(candidate, size=max(18, H // 25))
            break
        except Exception:
            font = None
    if font is None:
        font = ImageFont.load_default()

    # Compute band height based on font metrics
    band_h = max(60, H // 12)
    band = Image.new("RGB", (W, band_h), color=(240, 240, 240))
    draw = ImageDraw.Draw(band)

    # Multi-line safe wrap (simple)
    margin = 16
    draw.text((margin, band_h//2 - 10), caption_text, fill=(0, 0, 0), font=font, anchor="lm")

    stacked = Image.new("RGB", (W, H + band_h), color=(255, 255, 255))
    stacked.paste(band, (0, 0))
    stacked.paste(img, (0, band_h))
    return stacked

def export_pdf(harris_path, aligned_path, matches_path, out_pdf_path):
    caption = "METHOD: SIFT + FLANN  |  Lowe ratio = 0.7  |  NOTE: features were NOT limited to 10; MIN_MATCH_COUNT = 10"
    p1 = Image.open(harris_path).convert("RGB")
    p2 = _add_caption(aligned_path, caption)
    p3 = _add_caption(matches_path, caption)
    p1.save(out_pdf_path, save_all=True, append_images=[p2, p3])
    print(f"Saved PDF: {res_path}/assignment_4_pages.pdf")

export_pdf(
    res_path / "harris.png",
    res_path / "aligned.png",
    res_path / "matches.png",
    res_path / "assignment_4_pages.pdf"
)

Saved PDF: results/assignment_4_pages.pdf
