In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import os
import pickle
from google.colab import files
import gzip
from time import time

## Upload the image data file
uploaded = files.upload()

Saving image_data.pkl.gz to image_data.pkl (1).gz


In [10]:
image_data_path = "/content/image_data.pkl (1).gz"

In [11]:
# Load the compressed image data from the uploaded gzip file
with gzip.open(image_data_path, 'rb') as f:
    image_data = pickle.load(f)
print("Loaded compressed image data from file.")

Loaded compressed image data from file.


In [12]:
# Load pre-trained VGG16 model + higher level layers
base_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = tf.keras.models.Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [13]:
def preprocess_image(image):
    image = cv2.resize(image, (224, 224))
    image = np.stack((image,)*3, axis=-1)  # Convert grayscale to RGB
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = tf.keras.applications.vgg16.preprocess_input(image)
    return image

def extract_features(image_data):
    features = {}
    for filename, image in image_data.items():
        image = preprocess_image(image)
        feature = model.predict(image).flatten()
        features[filename] = feature
    return features

In [16]:
# Measure execution time for feature extraction
start_time = time()
features_extracted = extract_features(image_data)
end_time = time()
extraction_time_taken = end_time - start_time
print(f"Time taken for feature extraction: {extraction_time_taken:.4f} seconds")

Time taken for feature extraction: 234.4444 seconds


In [15]:
# Save the features to a compressed file
features_path = '/content/features_extracted.pkl.gz'
with gzip.open(features_path, 'wb') as f:
    pickle.dump(features_extracted, f, protocol=pickle.HIGHEST_PROTOCOL)
print(f"Saved compressed features to {features_path}.")

# Save the execution time to a file
time_path = '/content/extraction_time_taken.txt'
with open(time_path, 'w') as f:
    f.write(f"Time taken to extract features: {extraction_time_taken:.4f} seconds")
print(f"Saved execution time to {time_path}.")

Saved compressed features to /content/features_extracted.pkl.gz.
Saved execution time to /content/extraction_time_taken.txt.
