Load the CIFAR-10 dataset

In [2]:
# Import necessary libraries
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
import numpy as np

# Load the CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


Using a pre-trained model to generate embeddings. ResNet50 model is pre-trained on the ImageNet dataset. Each embedding is a high-dimensional vector, and you can associate each one with the ID of the corresponding image.

In [4]:

# Load the pre-trained ResNet50 model
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Define batch size and number of batches
batch_size = 100
num_batches = len(train_images) // batch_size

# Initialize an empty array to hold the embeddings
train_embeddings = np.empty((len(train_images), 2048))

# Generate embeddings for the training images in batches
for i in range(num_batches):
    start = i * batch_size
    end = start + batch_size
    batch_images = train_images[start:end]
    
    # Resize images to match the input size for ResNet50
    batch_images = np.array([np.resize(img, (224, 224, 3)) for img in batch_images])
    
    # Preprocess images for ResNet50
    batch_images = preprocess_input(batch_images)
    
    # Generate embeddings for the batch
    batch_embeddings = model.predict(batch_images)
    
    # Store the embeddings in the array
    train_embeddings[start:end] = batch_embeddings

    print(f'Batch {i+1}/{num_batches} processed')

# If there are any remaining images that don't make up a full batch, process them separately
if len(train_images) % batch_size != 0:
    start = num_batches * batch_size
    batch_images = train_images[start:]
    
    # Resize and preprocess images
    batch_images = np.array([np.resize(img, (224, 224, 3)) for img in batch_images])
    batch_images = preprocess_input(batch_images)
    
    # Generate embeddings for the batch
    batch_embeddings = model.predict(batch_images)
    
    # Store the embeddings in the array
    train_embeddings[start:] = batch_embeddings

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Batch 1/500 processed
Batch 2/500 processed
Batch 3/500 processed
Batch 4/500 processed
Batch 5/500 processed
Batch 6/500 processed
Batch 7/500 processed
Batch 8/500 processed
Batch 9/500 processed
Batch 10/500 processed
Batch 11/500 processed
Batch 12/500 processed
Batch 13/500 processed
Batch 14/500 processed
Batch 15/500 processed
Batch 16/500 processed
Batch 17/500 processed
Batch 18/500 processed
Batch 19/500 processed
Batch 20/500 processed
Batch 21/500 processed
Batch 22/500 processed
Batch 23/500 processed
Batch 24/500 processed
Batch 25/500 processed
Batch 26/500 processed
Batch 27/500 processed
Batch 28/500 processed
Batch 29/500 processed
Batch 30/500 processed
Batch 31/500 processed
Batch 32/500 processed
Batch 33/500 processed
Batch 34/500 processed
Batch 35/500 processed
Batch 36/500 processed
Batch 37/500 processed
Batch 38/500 pr