In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import numpy as np
from numpy.linalg import norm
import cv2
import os
import pickle

# Load the ResNet50 model with pre-trained ImageNet weights, excluding the top fully connected layer
print("Loading ResNet50 model...")
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
model.trainable = False

# Add a GlobalMaxPooling2D layer to the model
model = tf.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])
print("ResNet50 model loaded and configured.")

# Function to extract ResNet50 features
def extract_resnet50_features(img_path, model):
    img = image.load_img(img_path, target_size=(224,224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)
    return normalized_result

# Function to extract color histograms
def extract_color_histogram(image_path, bins=(8, 8, 8)):
    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

# Directory of images
image_directory = 'preprocessed_images'
print(f"Collecting image filenames from directory: {image_directory}")
filenames = [os.path.join(image_directory, file) for file in os.listdir(image_directory)]
print(f"Found {len(filenames)} images.")

# Batch processing parameters
batch_size = 1000
num_batches = len(filenames) // batch_size + (1 if len(filenames) % batch_size != 0 else 0)
print(f"Processing images in {num_batches} batches with batch size {batch_size}.")

# Initialize lists for features
resnet50_features = []
color_features = []

# Process images in batches
for batch_idx in range(num_batches):
    start_idx = batch_idx * batch_size
    end_idx = min((batch_idx + 1) * batch_size, len(filenames))
    batch_filenames = filenames[start_idx:end_idx]
    
    for file in batch_filenames:
        resnet50_features.append(extract_resnet50_features(file, model))
        color_features.append(extract_color_histogram(file))

    print(f"Processed batch {batch_idx + 1}/{num_batches}")

# Create directory for pickle files if it doesn't exist
output_directory = 'pickle files'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print(f"Created directory: {output_directory}")

# Save the features and filenames
print("Saving extracted features and filenames...")
pickle.dump(resnet50_features, open(os.path.join(output_directory, 'resnet50_features.pkl'), 'wb'))
pickle.dump(color_features, open(os.path.join(output_directory, 'color_features.pkl'), 'wb'))
pickle.dump(filenames, open(os.path.join(output_directory, 'filenames.pkl'), 'wb'))
print("Features and filenames saved successfully.")


Loading ResNet50 model...
ResNet50 model loaded and configured.
Collecting image filenames from directory: preprocessed_images
Found 44441 images.
Processing images in 45 batches with batch size 1000.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/st

In [2]:
import pickle 
# Load the saved pickle files
resnet50_features = pickle.load(open('pickle files/resnet50_features.pkl', 'rb'))
color_features = pickle.load(open('pickle files/color_features.pkl', 'rb'))
filenames = pickle.load(open('pickle files/filenames.pkl', 'rb'))

# Print the shapes and example content
print("Number of images:", len(filenames))
print("Shape of ResNet50 features:", resnet50_features[0].shape if resnet50_features else "Not loaded")
print("Shape of color features:", color_features[0].shape if color_features else "Not loaded")

# Print example filename and features
if filenames and resnet50_features and color_features:
    print("\nExample filename:", filenames[0])
    print("Example ResNet50 features:", resnet50_features[0])
    print("Example color features:", color_features[0])

Number of images: 44441
Shape of ResNet50 features: (2048,)
Shape of color features: (512,)

Example filename: preprocessed_images\10000.jpg
Example ResNet50 features: [0.         0.02213264 0.00990909 ... 0.01323134 0.01578172 0.05099545]
Example color features: [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 9.6596569e-01 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+