Since We have a vast dataset and want to find similar images for a input image. We're storing image directory, name, and feature vectors in a database. This enables us to query and retrieve the most similar images to a given input image from the dataset efficiently

In [1]:
import os
import sqlite3
import numpy as np
from tensorflow.keras.applications import vgg16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image

2023-08-28 07:32:32.191434: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load pre-trained ResNet50 model (excluding classification layers)
model = vgg16.VGG16(weights='imagenet', include_top=False, pooling='avg')

2023-08-28 07:32:33.952492: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [3]:
# Connect to the SQLite database or create a new one if it doesn't exist
conn = sqlite3.connect('image_features_database.db')
cursor = conn.cursor()

# Create a table to store image features
cursor.execute('''
    CREATE TABLE IF NOT EXISTS image_features (
        id INTEGER PRIMARY KEY,
        directory TEXT,
        filename TEXT,
        features BLOB
    )
''')
conn.commit()

In [None]:
# Directory containing images
image_dir = 'img/img/MEN/Denim/id_00000089/'

img_root_dir = 'img'

# Process each image in the root directory
for subdir, dirs, images in os.walk(img_root_dir):
    for image_name in images:
        if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(subdir, image_name)
            # Load and preprocess the image
            img = image.load_img(img_path, target_size=(224, 224))
            img_array = image.img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array) 
            # Extract features using the pre-trained model
            img_features = model.predict(img_array) 
            assert img_features.shape != (1,224,224,3), "img_features_shape not matched"
            
            # Insert subdir, image_name, image features into the database
            cursor.execute('INSERT INTO image_features (directory, filename, features) VALUES (?,?, ?)', (subdir, image_name, img_features.tobytes()))
            conn.commit()

# Close the database connection
conn.close()

