In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import combinations
import time


In [None]:
DATA_DIR = Path("data/lizard_pond")   # folder with images
IMAGE_EXT = ".png"                   # or ".jpg"


In [None]:
try:
    sift = cv2.SIFT_create()
    print("✅ SIFT initialized successfully")
except Exception as e:
    sift = None
    print("❌ SIFT not available:", e)


In [None]:
def root_sift(descriptors):
    if descriptors is None:
        return None
    descriptors = descriptors.astype(np.float32)
    descriptors /= (descriptors.sum(axis=1, keepdims=True) + 1e-12)
    return np.sqrt(descriptors)


In [None]:
def extract_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    descriptors = root_sift(descriptors)
    return keypoints, descriptors


In [None]:
def match_descriptors(desc1, desc2, ratio=0.75):
    if desc1 is None or desc2 is None:
        return []

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(desc1, desc2, k=2)

    good = []
    for m_n in matches:
        if len(m_n) < 2:
            continue
        m, n = m_n
        if m.distance < ratio * n.distance:
            good.append(m)
    return good


In [None]:
def geometric_verification(kp1, kp2, matches, ransac_thresh=5.0):
    if len(matches) < 4:
        return [], None

    pts1 = np.float32([kp1[m.queryIdx].pt for m in matches])
    pts2 = np.float32([kp2[m.trainIdx].pt for m in matches])

    H, mask = cv2.findHomography(pts1, pts2, cv2.RANSAC, ransac_thresh)
    if mask is None:
        return [], H

    mask = mask.ravel().astype(bool)
    inliers = [matches[i] for i in range(len(matches)) if mask[i]]
    return inliers, H


In [None]:
images = []
image_names = []

for img_path in sorted(DATA_DIR.glob(f"*{IMAGE_EXT}")):
    img = cv2.imread(str(img_path))
    if img is not None:
        images.append(img)
        image_names.append(img_path.name)

print(f"Loaded {len(images)} images")


In [None]:
keypoints_list = []
descriptors_list = []

for img in images:
    kp, desc = extract_features(img)
    keypoints_list.append(kp)
    descriptors_list.append(desc)

print("✅ Feature extraction complete")


In [None]:
pair_scores = {}

for (i, j) in combinations(range(len(images)), 2):
    matches = match_descriptors(descriptors_list[i], descriptors_list[j])
    inliers, _ = geometric_verification(
        keypoints_list[i], keypoints_list[j], matches
    )
    pair_scores[(i, j)] = len(inliers)

print("✅ Pairwise matching completed")


In [None]:
i, j = list(pair_scores.keys())[0]

matches = match_descriptors(descriptors_list[i], descriptors_list[j])
inliers, _ = geometric_verification(
    keypoints_list[i], keypoints_list[j], matches
)

vis = cv2.drawMatches(
    images[i], keypoints_list[i],
    images[j], keypoints_list[j],
    inliers, None,
    flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
)

plt.figure(figsize=(12,6))
plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.title(f"Inlier Matches: {len(inliers)}")
plt.show()


In [None]:
n = len(images)
sim = np.zeros((n, n))

for (i, j), score in pair_scores.items():
    sim[i, j] = score
    sim[j, i] = score


In [None]:
results = []

for i in range(n):
    scores = sim[i]
    top5 = np.argsort(scores)[::-1][1:6]  # exclude self
    results.append({
        "lizard_image": image_names[i],
        "top5_ponds": list(top5)
    })

df_results = pd.DataFrame(results)
df_results.to_csv("submission.csv", index=False)

df_results.head()


In [None]:
def apk(actual, predicted, k=5):
    predicted = predicted[:k]
    score = 0.0
    hits = 0.0

    for i, p in enumerate(predicted):
        if p == actual:
            hits += 1
            score += hits / (i + 1)
            break
    return score

def map5(pred_df, gt_df):
    scores = []
    for _, row in gt_df.iterrows():
        liz = row["lizard_id"]
        true_pond = row["pond_id"]

        preds = pred_df[pred_df.lizard_image == liz]["top5_ponds"]
        if len(preds) == 0:
            continue

        scores.append(apk(true_pond, preds.values[0]))
    return np.mean(scores)

map5_score = map5(df_results, gt)
map5_score
