In [6]:
import os
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [12]:

# Load pretrained model and remove classification head
model = resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove last FC layer
model.eval()

# Preprocessing pipeline
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet mean
        std=[0.229, 0.224, 0.225]    # ImageNet std
    )
])

def extract_features(img_path):
    """Extract feature vector from an image file."""
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = model(img_tensor).squeeze()  # Remove batch and flatten
    return features.numpy()

def find_most_similar(query_img_path, dataset_folder):
    """Find image in dataset_folder most similar to query image."""
    query_features = extract_features(query_img_path).reshape(1, -1)

    max_similarity = -1
    most_similar_img = None

    for subdir, _, files in os.walk(dataset_folder):
        # Select 5 random files from each subfolder
        selected_files = np.random.choice(files, size=min(5, len(files)), replace=False)
        for filename in selected_files:
            file_path = os.path.join(subdir, filename)
            if os.path.isfile(file_path):
                dataset_features = extract_features(file_path).reshape(1, -1)
                similarity = cosine_similarity(query_features, dataset_features)[0][0]
                print(f"Similarity with {filename}: {similarity:.4f}")

                if similarity > max_similarity:
                    max_similarity = similarity
                    most_similar_img = file_path

    print(f"\nMost similar image: {most_similar_img} (Similarity: {max_similarity:.4f})")
    return most_similar_img

# Example usage
input_image_dir = os.path.join(os.getcwd(), '..', 'imput_samp')
input_image_name = 'image2.jpg'
input_image_path = os.path.join(input_image_dir, input_image_name)
dataset_dir = os.path.join(os.getcwd(), '..', 'Datasets')
find_most_similar(input_image_path, dataset_dir)




Similarity with anna_image_291.jpeg: 0.5622
Similarity with anna_image_118.jpeg: 0.5850
Similarity with anna_image_225.jpeg: 0.5700
Similarity with anna_image_88.jpeg: 0.6628
Similarity with anna_image_202.jpeg: 0.5336
Similarity with ariel_image_151.jpeg: 0.6127
Similarity with ariel_image_226.jpeg: 0.5602
Similarity with ariel_image_185.jpeg: 0.5233
Similarity with ariel_image_116.jpeg: 0.5735
Similarity with ariel_image_238.jpeg: 0.6159
Similarity with arura_image_374.jpeg: 0.5717
Similarity with arura_image_91.jpeg: 0.5717
Similarity with arura_image_214.jpeg: 0.6044
Similarity with arura_image_224.jpeg: 0.5858
Similarity with arura_image_153.jpeg: 0.5375
Similarity with belle_image_194.jpeg: 0.5221
Similarity with belle_image_249.jpeg: 0.5482
Similarity with belle_image_168.jpeg: 0.6819
Similarity with belle_image_177.jpeg: 0.6290
Similarity with belle_image_270.jpeg: 0.7000
Similarity with Cinderella_image_614.jpeg: 0.5829
Similarity with Cinderella_image_425.jpeg: 0.5566
Similar

'c:\\Users\\quydz\\Desktop\\data_hacks_2025\\Code_space\\..\\Datasets\\belle\\belle_image_270.jpeg'

In [13]:
import os
import random
from deepface import DeepFace

def get_random_images_from_subdirs(dataset_folder, sample_size=10, valid_exts=(".jpg", ".jpeg", ".png")):
    all_image_paths = []
    for root, _, files in os.walk(dataset_folder):
        for file in files:
            if file.lower().endswith(valid_exts):
                all_image_paths.append(os.path.join(root, file))

    if len(all_image_paths) == 0:
        raise ValueError("No images found in dataset folder.")

    return random.sample(all_image_paths, min(sample_size, len(all_image_paths)))

def find_most_similar_face(query_img_path, dataset_folder, model_name="Facenet", sample_size=10):
    sample_images = get_random_images_from_subdirs(dataset_folder, sample_size=sample_size)

    best_match = None
    best_score = float("inf")  # Lower distance is better

    for img_path in sample_images:
        try:
            result = DeepFace.verify(img1_path=query_img_path,
                                     img2_path=img_path,
                                     model_name=model_name,
                                     enforce_detection=False)
            distance = result['distance']
            print(f"Similarity with {os.path.basename(img_path)}: Distance = {distance:.4f}")
            if distance < best_score:
                best_score = distance
                best_match = img_path
        except Exception as e:
            print(f"Skipped {img_path}: {e}")

    print(f"\n✅ Most similar face: {best_match} (Distance: {best_score:.4f})")
    return best_match


ImportError: Traceback (most recent call last):
  File "c:\Users\quydz\AppData\Local\Programs\Python\Python312\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.