In [None]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Step 1: Load pre-trained CNN model (ResNet50)
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Step 2: Image preprocessing
def preprocess_image(image_path, target_size=(224, 224)):
    """
    Preprocess an image for ResNet50 input.
    """
    img = Image.open(image_path).convert('RGB')
    img = img.resize(target_size)
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

# Step 3: Feature extraction
def extract_features(image_folder):
    """
    Extract features for all images in a folder using ResNet50.
    """
    embeddings = {}
    for img_file in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_file)
        if img_file.lower().endswith(('jpg', 'jpeg', 'png')):  # Filter image files
            img_array = preprocess_image(img_path)
            features = model.predict(img_array).flatten()
            embeddings[img_file] = features
    print("Feature extraction completed for all images.")
    return embeddings

# Step 4: Compute similarity
def find_similar_images(query_image_path, embeddings, image_folder, top_n=5):
    """
    Find the top-N most similar images to a given query image.
    """
    query_img_array = preprocess_image(query_image_path)
    query_features = model.predict(query_img_array).flatten()
    similarities = {}
    
    for img_file, features in embeddings.items():
        similarity = cosine_similarity([query_features], [features])[0][0]
        similarities[img_file] = similarity
    
    # Sort by similarity score
    sorted_images = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    return sorted_images[:top_n]

# Step 5: Visualization of Similar Images
def visualize_similar_images(query_image_path, similar_images, image_folder):
    """
    Display the query image and its top-N most similar images.
    """
    plt.figure(figsize=(15, 5))
    # Display query image
    plt.subplot(1, len(similar_images) + 1, 1)
    query_img = mpimg.imread(query_image_path)
    plt.imshow(query_img)
    plt.title("Query Image")
    plt.axis('off')
    
    # Display similar images
    for i, (img_file, score) in enumerate(similar_images):
        img_path = os.path.join(image_folder, img_file)
        plt.subplot(1, len(similar_images) + 1, i + 2)
        similar_img = mpimg.imread(img_path)
        plt.imshow(similar_img)
        plt.title(f"Score: {score:.2f}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Step 6: Main Execution
if __name__ == "__main__":
    # Path to the images folder
    image_folder = r"D:\\Manipal University\\Myntra Dataset\\Images\\Images\\"
    query_image_path = r"D:\\Manipal University\\Myntra Dataset\\Images\\Images\\0.jpg"  # Replace with an actual image path

    # Verify paths
    if not os.path.exists(image_folder):
        raise FileNotFoundError(f"Image folder not found at: {image_folder}")
    if not os.path.exists(query_image_path):
        raise FileNotFoundError(f"Query image not found at: {query_image_path}")

    # Extract features for all images
    embeddings = extract_features(image_folder)

    # Find similar images
    similar_images = find_similar_images(query_image_path, embeddings, image_folder, top_n=5)
    print("Top Similar Images:")
    for img_file, score in similar_images:
        print(f"Image: {img_file}, Similarity Score: {score:.4f}")

    # Visualize the query image and similar images
    visualize_similar_images(query_image_path, similar_images, image_folder)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1