In [None]:
import os
import pandas as pd
import cv2
from tqdm import tqdm
import numpy as np

In [None]:
# edit with your dataset path
root_path = "./face-matching"

# Dataset

In [None]:
ref_df = pd.read_csv("ref_img.csv", dtype={'ref_img': str})
ref_ids = ref_df["ref_img"].tolist()

img_dir = f"{root_path}/images"
all_images = os.listdir(img_dir)

In [None]:
features = {}
for img_path in tqdm(all_images):
    img_id = img_path[:-len(".jpg")]
    img_path = f"{img_dir}/{img_path}"

    img = cv2.imread(img_path)
    mean_color = img.reshape(-1, 3).mean(axis=0)
    features[img_id] = mean_color

100%|██████████| 109/109 [00:00<00:00, 177.97it/s]


# Submission

In [None]:
results = []

for ref_id in ref_ids:
    ref_feature = features[ref_id]

    # compute distances to all images
    distances = {}
    for img_id, feature in features.items():
        dist = np.linalg.norm(ref_feature - feature)
        distances[img_id] = dist

    # sort by distance, exclude reference, take top 5
    sorted_ids = sorted(distances.items(), key=lambda x: x[1])
    top_5 = [img_id for img_id, _ in sorted_ids if img_id != ref_id][:5]

    results.append({"ref_img": ref_id, "photos": "|".join(top_5)})

In [None]:
submission = pd.DataFrame(results)
submission

Unnamed: 0,ref_img,photos
0,48,051|016|084|080|001
1,25,008|078|077|093|087
2,95,108|029|098|014|093
3,43,050|019|104|006|076
4,105,002|056|058|075|086
5,71,015|080|087|016|001
6,46,005|020|040|060|035
7,96,025|015|008|071|077
8,20,046|005|040|035|053
9,85,092|066|070|104|081


In [None]:
submission.to_csv("submission.csv", index=False)