In [None]:
%pip install numpy pandas pillow tensorflow opencv-python matplotlib

In [None]:
import os
import numpy as np
import tensorflow as tf
import cv2  
from PIL import Image
import matplotlib.pyplot as plt

# Define dataset paths
dataset_path = "/kaggle/input/moddelite/ModdeDataset"  # Original dataset
jpeg_dataset_path = "/kaggle/working/MODDE_JPEG"  # Path for converted JPEG images

# Step 1: Convert images to JPEG format
def convert_images_to_jpeg(src_dir, dest_dir):
    """ Converts all images in src_dir to JPEG format and saves them in dest_dir. """
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    for root, _, files in os.walk(src_dir):
        for file in files:
            src_file_path = os.path.join(root, file)  
            rel_path = os.path.relpath(root, src_dir)  
            dest_dir_path = os.path.join(dest_dir, rel_path)  
            dest_file_path = os.path.join(dest_dir_path, file)  

            if not os.path.exists(dest_dir_path):
                os.makedirs(dest_dir_path)

            # Skip files that already exist
            if os.path.exists(dest_file_path):
                continue

            try:
                # Convert to RGB JPEG format
                with Image.open(src_file_path) as img:
                    if img.mode != 'RGB':  
                        img = img.convert('RGB')
                    dest_file_path = os.path.splitext(dest_file_path)[0] + '.jpg'
                    img.save(dest_file_path, 'JPEG')
            except Exception as e:
                print(f"Failed to process {src_file_path}: {e}")

# Run conversion
convert_images_to_jpeg(dataset_path, jpeg_dataset_path)

# Step 2: Load dataset with preprocessing
def preprocess_image(image, label):
    """ Preprocesses the image: resize, normalize, and apply data augmentation. """
    # Resize to (256, 256)
    image = tf.image.resize(image, (256, 256))  

    # Normalize pixel values to [0,1]
    image = image / 255.0  

    # Data augmentation
    image = tf.image.random_flip_left_right(image)  # Random horizontal flip
    image = tf.image.random_brightness(image, max_delta=0.1)  # Adjust brightness
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)  # Adjust contrast

    return image, label

# Load dataset
data = tf.keras.utils.image_dataset_from_directory(
    jpeg_dataset_path,
    labels='inferred',  
    label_mode='categorical',  
    batch_size=32,  
    image_size=(256, 256),  
    shuffle=True,  
    seed=123  
)

# Apply preprocessing to dataset
data = data.map(preprocess_image)

# Step 3: Display sample images
plt.figure(figsize=(12, 8))
shown_classes = set()
grid_size = (3, 4)  

for images, labels in data.take(1):
    for i in range(len(images)):
        label_index = np.argmax(labels[i].numpy())  
        if label_index not in shown_classes:
            ax = plt.subplot(grid_size[0], grid_size[1], len(shown_classes) + 1)  
            ax.imshow(images[i].numpy())
            ax.set_title(data.class_names[label_index], fontsize=12, weight="bold", color="#333333")
            ax.axis("off")
            shown_classes.add(label_index)
        if len(shown_classes) == len(data.class_names):  
            break
    if len(shown_classes) == len(data.class_names):  
        break

plt.suptitle("Sample Images after Preprocessing", fontsize=16, weight="bold", color="#222222", y=0.92)
plt.tight_layout(pad=2.0, rect=[0, 0, 1, 0.92])
plt.show()


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Model

# Load pre-trained MobileNetV2 model
base_model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

# Function to extract features
def extract_features(image_path):
    img = load_and_preprocess_image(image_path)  # Load image
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    img = preprocess_input(img)  # Apply model-specific preprocessing
    features = feature_extractor.predict(img)  # Extract features
    return features.flatten()  # Flatten to 1D vector

# Extract features for all images in our dataset
image_features = np.array([extract_features(img) for img in image_files])

print(f"Extracted feature shape: {image_features.shape}")  # Should be (num_images, 1280)


In [None]:
import faiss

# Get feature vector dimensions
feature_dim = image_features.shape[1]

# Create FAISS index
index = faiss.IndexFlatL2(feature_dim)  # L2 (Euclidean) distance
index.add(image_features)  # Add feature vectors to the index

print(f"Stored {index.ntotal} images in FAISS index")


In [None]:
def find_similar_images(query_image_path, top_k=3):
    query_features = extract_features(query_image_path)  # Extract features
    query_features = np.expand_dims(query_features, axis=0)  # Add batch dimension

    # Search in FAISS index
    distances, indices = index.search(query_features, top_k)

    # Display results
    fig, axes = plt.subplots(1, top_k + 1, figsize=(15, 5))

    # Show query image
    axes[0].imshow(load_and_preprocess_image(query_image_path))
    axes[0].set_title("Query Image")
    axes[0].axis("off")

    # Show retrieved images
    for i, idx in enumerate(indices[0]):
        img_path = image_files[idx]
        axes[i + 1].imshow(load_and_preprocess_image(img_path))
        axes[i + 1].set_title(f"Match {i+1}")
        axes[i + 1].axis("off")

    plt.show()

# Test with a new image
test_image = "../input/sample-clothing-images/test.jpg"  # Replace with actual image path
find_similar_images(test_image)
