In [27]:
import os
import pandas as pd
import cv2
from tqdm.notebook import tqdm
import numpy as np
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

In [44]:
root_path = "./face-matching"
ROOT_PATH = "./face-matching"
IMG_DIR = os.path.join(ROOT_PATH, "images")
REF_CSV = os.path.join(ROOT_PATH, "ref_img.csv")
MODEL_ID = "openai/clip-vit-large-patch14"
THRESHOLD = 0.80
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


# Dataset

In [46]:
ref_df = pd.read_csv(REF_CSV, dtype={'ref_img': str})
ref_ids = ref_df["ref_img"].tolist()
all_images = [f for f in os.listdir(IMG_DIR) if f.endswith('.jpg')]

# Model

In [47]:
model = CLIPModel.from_pretrained(MODEL_ID).to(device)
processor = CLIPProcessor.from_pretrained(MODEL_ID)

In [None]:
features = {}
print("Extracting features with TTA...")

for img_filename in tqdm(all_images):
    img_id = img_filename[:-4]
    img_path = os.path.join(IMG_DIR, img_filename)
    
    try:
        # Load image
        image = Image.open(img_path).convert("RGB")
        
        # This helps if the face is angled or partially hidden
        images_to_process = [image, image.transpose(Image.FLIP_LEFT_RIGHT)]
        
        inputs = processor(images=images_to_process, return_tensors="pt", padding=True).to(device)
        
        with torch.no_grad():
            batch_features = model.get_image_features(**inputs)
            batch_features = batch_features / batch_features.norm(p=2, dim=-1, keepdim=True)

            # Average the original and flipped features
            avg_feature = batch_features.mean(dim=0)
            avg_feature = avg_feature / avg_feature.norm(p=2, dim=-1, keepdim=True)
            
        features[img_id] = avg_feature.cpu().numpy()
        
    except Exception as e:
        print(f"Error {img_id}: {e}")

Extracting features with TTA...


  0%|          | 0/109 [00:00<?, ?it/s]

In [None]:
results = []

print("Matching...")
for ref_id in ref_ids:
    if ref_id not in features:
        results.append({"ref_img": ref_id, "photos": ""})
        continue

    ref_feature = features[ref_id]
    scores = {}
    
    for img_id, feature in features.items():
        if img_id == ref_id: continue
        
        # score
        score = np.dot(ref_feature, feature.T).item()
        scores[img_id] = score
    
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    matches = [img_id for img_id, score in sorted_scores if score > THRESHOLD]
    
    # Prevent null columns and ensure at least 4 matches
    if len(matches) < 4:
        matches = [img_id for img_id, _ in sorted_scores[:4]]
        
    results.append({
        "ref_img": ref_id, 
        "photos": "|".join(matches)
    })

Matching...


# Z-score

In [None]:
img_features = {} 
ref_features = {}

print("Extracting features with TTA...")
for img_filename in tqdm(all_images):
    img_id = img_filename[:-4]
    img_path = os.path.join(IMG_DIR, img_filename)
    
    try:
        image = Image.open(img_path).convert("RGB")
        
        images = [image, image.transpose(Image.FLIP_LEFT_RIGHT)]
        inputs = processor(images=images, return_tensors="pt", padding=True).to(device)
        
        with torch.no_grad():
            emb = model.get_image_features(**inputs)
            emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
            
            # Average original and flipped
            emb = emb.mean(dim=0)
            emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
            
        feat = emb.cpu().numpy()
        img_features[img_id] = feat
        
        if img_id in ref_ids:
            ref_features[img_id] = feat
            
    except Exception as e:
        print(f"Error {img_id}: {e}")

non_ref_ids = [i for i in img_features.keys() if i not in ref_ids]

img_matrix = np.vstack([img_features[i] for i in non_ref_ids])
ref_matrix = np.vstack([ref_features[r] for r in ref_ids])

# Shape: (N_images, 15)
raw_scores = np.dot(img_matrix, ref_matrix.T)

means = raw_scores.mean(axis=0)
stds = raw_scores.std(axis=0)
z_scores = (raw_scores - means) / (stds + 1e-6) # Avoid div by zero

final_assignments = {r: [] for r in ref_ids}
assigned_img_indices = set()

candidates = []
rows, cols = z_scores.shape
for r in range(rows):
    for c in range(cols):
        candidates.append((z_scores[r, c], r, c))

candidates.sort(key=lambda x: x[0], reverse=True)

MAX_CAP = 8 
matches_count = {c: 0 for c in range(cols)}

# Pass 1: Fill up to Cap OR ELSE ITS GONNA GET DOMINATED BY 1 lol
for score, img_idx, ref_idx in candidates:
    if img_idx in assigned_img_indices:
        continue
    
    if matches_count[ref_idx] < MAX_CAP:
        real_img_id = non_ref_ids[img_idx]
        real_ref_id = ref_ids[ref_idx]
        final_assignments[real_ref_id].append(real_img_id)
        
        assigned_img_indices.add(img_idx)
        matches_count[ref_idx] += 1

# Assign them to their absolute best match regardless of cap
for i in range(rows):
    if i not in assigned_img_indices:
        best_ref_idx = np.argmax(z_scores[i])
        real_img_id = non_ref_ids[i]
        real_ref_id = ref_ids[best_ref_idx]
        final_assignments[real_ref_id].append(real_img_id)

results = []
for rid in ref_ids:
    photos = final_assignments[rid]
    results.append({
        "ref_img": rid,
        "photos": "|".join(photos)
    })

submission = pd.DataFrame(results)
print(submission)

# Sanity Check: Print lengths
count = sum(len(v) for v in final_assignments.values())

print("\nCounts per reference: with total matches =", count)
for rid in ref_ids:
    count = len(final_assignments[rid])
    print(f"Ref {rid}: {count} matches")
    
submission.to_csv("submission_zscore.csv", index=False)

Extracting features with TTA...


  0%|          | 0/109 [00:00<?, ?it/s]

   ref_img                           photos
0      048                  101|102|078|066
1      025  083|086|006|067|040|041|033|042
2      095          021|097|014|047|072|045
3      043          034|027|013|023|081|052
4      105  024|029|093|050|058|100|001|057
5      071  011|070|065|088|017|049|032|094
6      046          060|019|010|069|075|063
7      096              035|015|068|018|098
8      020              031|053|077|091|038
9      085              106|103|036|009|037
10     061      104|107|059|062|002|022|030
11     073          090|003|039|108|044|082
12     084      012|089|055|056|007|016|079
13     026              080|051|099|004|076
14     008  087|000|064|074|054|092|028|005

Counts per reference: with total matches = 94
Ref 048: 4 matches
Ref 025: 8 matches
Ref 095: 6 matches
Ref 043: 6 matches
Ref 105: 8 matches
Ref 071: 8 matches
Ref 046: 6 matches
Ref 096: 5 matches
Ref 020: 5 matches
Ref 085: 5 matches
Ref 061: 7 matches
Ref 073: 6 matches
Ref 084: 7 matches
Re

# Hungarian Algo

In [None]:
from scipy.optimize import linear_sum_assignment

target_images = [img for img in all_images if img[:-4] not in ref_ids]

print(f"Total Images: {len(all_images)}")
print(f"References: {len(ref_ids)}")
print(f"Targets to assign: {len(target_images)}")

target_feats = []
target_ids = []
ref_feats_map = {}

print("Extracting features...")

def get_embedding(path):
    image = Image.open(path).convert("RGB")
    images = [image, image.transpose(Image.FLIP_LEFT_RIGHT)]
    inputs = processor(images=images, return_tensors="pt", padding=True).to(device)

    with torch.no_grad():
        emb = model.get_image_features(**inputs)
        emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
        emb = emb.mean(dim=0) # Average them
        emb = emb / emb.norm(p=2, dim=-1, keepdim=True) # Renormalize

    return emb.cpu().numpy()

for rid in tqdm(ref_ids, desc="Refs"):
    path = os.path.join(IMG_DIR, f"{rid}.jpg")
    if os.path.exists(path):
        ref_feats_map[rid] = get_embedding(path)
    else:
        print(f"WARNING: Ref {rid} not found!")

for img_filename in tqdm(target_images, desc="Targets"):
    img_id = img_filename[:-4]
    path = os.path.join(IMG_DIR, img_filename)
    try:
        feat = get_embedding(path)
        target_feats.append(feat)
        target_ids.append(img_id)
    except Exception as e:
        print(f"Error {img_id}: {e}")

# Shape: (N_targets, 512)
target_matrix = np.vstack(target_feats)
# Shape: (15, 512)
ref_matrix = np.vstack([ref_feats_map[rid] for rid in ref_ids])

# Shape: (N_targets, 15)
raw_sims = np.dot(target_matrix, ref_matrix.T)

means = raw_sims.mean(axis=0)
stds = raw_sims.std(axis=0)
z_scores = (raw_sims - means) / (stds + 1e-6) # Same as above

# Hungarian Algo
# We need to assign N_targets to 15 references.
# But Hungarian is 1-to-1.
# Solution: Duplicate the references into "Slots".
# If we have ~94 targets and 15 refs, avg is ~6. 
# We give each ref 10 slots to be safe.
SLOTS_PER_REF = 10 
total_slots = len(ref_ids) * SLOTS_PER_REF

# Create Cost Matrix
# Hungarian minimizes cost, so Cost = -ZScore
cost_matrix = np.zeros((len(target_ids), total_slots))

for i in range(len(ref_ids)):
    score_col = z_scores[:, i]
    
    start_col = i * SLOTS_PER_REF
    end_col = start_col + SLOTS_PER_REF
    
    # negative to maximize
    cost_matrix[:, start_col:end_col] = -score_col[:, np.newaxis]

print(f"Solving assignment problem on matrix shape {cost_matrix.shape}...")

row_ind, col_ind = linear_sum_assignment(cost_matrix)

clusters = {rid: [] for rid in ref_ids}

for r, c in zip(row_ind, col_ind):
    ref_idx = c // SLOTS_PER_REF
    
    assigned_ref_id = ref_ids[ref_idx]
    assigned_img_id = target_ids[r]
    
    clusters[assigned_ref_id].append(assigned_img_id)

results = []
print("\nFinal Counts:")
for rid in ref_ids:
    photos = clusters[rid]
    print(f"Ref {rid}: {len(photos)}")
    results.append({
        "ref_img": rid, 
        "photos": "|".join(photos)
    })

submission = pd.DataFrame(results)
submission.to_csv("submission_hungarian.csv", index=False)
print("Saved to submission_hungarian.csv")

Total Images: 109
References: 15
Targets to assign: 94
Extracting features...


Refs:   0%|          | 0/15 [00:00<?, ?it/s]

Targets:   0%|          | 0/94 [00:00<?, ?it/s]

Solving assignment problem on matrix shape (94, 150)...

Final Counts:
Ref 048: 4
Ref 025: 10
Ref 095: 6
Ref 043: 6
Ref 105: 10
Ref 071: 7
Ref 046: 6
Ref 096: 5
Ref 020: 5
Ref 085: 5
Ref 061: 5
Ref 073: 6
Ref 084: 7
Ref 026: 5
Ref 008: 7
Saved to submission_hungarian.csv
