In [None]:
import json

def get_image_captions(caption_path):
  # Load the annotations file
  with open(caption_path, 'r') as f:
      annotations = json.load(f)

  captions_dict={}
  for annotation in annotations['annotations']:
      key= "000000" + str(annotation['image_id'])
      #captions
      captions_values=annotation['caption']

    # Append captions to the list
      if key not in captions_dict:
          captions_dict[key]=[captions_values]
      else:
          captions_dict[key].append(captions_values)
  return captions_dict


In [None]:
import string

def clean_captions(captions_dict):
  # dictionary to store the cleaned captions
  new_captions_dict = {}

  # prepare translation table for removing punctuation
  table = str.maketrans('', '', string.punctuation)

  # loop through the dictionary
  for caption_id, captions in captions_dict.items():
      cleaned_captions = []
      for caption_text in captions:
          # tokenize the caption_text
          tokens = caption_text.split()
          # convert to lower case
          tokens = [token.lower() for token in tokens]
          # remove punctuation
          tokens = [token.translate(table) for token in tokens]
          # remove short tokens (like 'a', 's')
          tokens = [token for token in tokens if len(token) > 1]
          # wrap with start and end tags
          cleaned_caption = 'startseq ' + ' '.join(tokens) + ' endseq'
          cleaned_captions.append(cleaned_caption)

      # store cleaned captions
      new_captions_dict[caption_id] = cleaned_captions

  return new_captions_dict


In [None]:
captions_val_path="/content/annotations/captions_val2017.json"
captions_val_dict=get_image_captions(captions_val_path)

In [None]:
new_captions_val_dict=clean_captions(captions_val_dict)

In [None]:
images_val_path="/content/val2017"

In [None]:
val_image_id = list(new_captions_val_dict.keys())


In [None]:
import re

def clean_filename(filename):
    match = re.search(r'\d+', filename)
    if match:
        return match.group(0)
    return None

val_validate_images = []
for image in os.listdir(images_val_path):
    cleaned_name = clean_filename(image)
    if cleaned_name and cleaned_name in val_image_id:
      val_validate_images.append(cleaned_name)



4147

In [None]:
import os
val_validate_images = []

val_validate_images = [ image.split('.')[0] for image in os.listdir(images_val_path) if image.split('.')[0] in val_image_id ]


In [None]:
len(val_validate_images)

4147

In [None]:
# Save val_validate_images to a pickle file
with open('val_validate_images.pkl', 'wb') as f:
    pickle.dump(val_validate_images, f)


In [None]:
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import os

def extract_features_efficientnet(directory, image_keys, target_size=(300, 300)):
    # Load EfficientNetB3 without top, add GlobalAveragePooling2D
    base_model = EfficientNetB3(include_top=False, weights='imagenet', input_shape=(target_size[0], target_size[1], 3))
    x = GlobalAveragePooling2D()(base_model.output)
    model = Model(inputs=base_model.input, outputs=x)

    print(model.summary())

    features = dict()

    for name in image_keys:
        filename = os.path.join(directory, name + '.jpg')

        try:
            # Load and preprocess the image
            image = load_img(filename, target_size=target_size)
            image = img_to_array(image)
            image = image.reshape((1,) + image.shape)
            image = preprocess_input(image)

            # Extract feature
            feature = model.predict(image, verbose=0)

            # Store feature
            image_id = name.split('.')[0]
            features[image_id] = feature[0]  # Shape: (1536,)

        except Exception as e:
            print(f"Failed to process {filename}: {e}")

    return features


In [None]:
val_validate_images_features=extract_features_efficientnet(images_val_path,val_validate_images)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


None


In [None]:
import pickle

# Save the features to a pickle file
with open('val_validate_images_features.pkl', 'wb') as f:
    pickle.dump(val_validate_images_features, f)


In [None]:
print("{} : {}".format(list(val_validate_images_features.keys())[0], val_validate_images_features[list(val_validate_images_features.keys())[0]] ))

000000144300 : [ 0.15875524  0.14131832  0.0859644  ... -0.06277041 -0.13962574
  0.20399617]


In [None]:
import numpy as np

with open('val_validate_images_features.pkl', 'rb') as f:
    val_validate_images_features = pickle.load(f)

# Number of image features
nb_val_images = len(val_validate_images_features)
print(f"Number of images: {nb_val_images}")

# Shape of a single image feature vector
first_key = list(val_validate_images_features.keys())[0]
first_feature_vector = val_validate_images_features[first_key]
shape_val_images = np.array(first_feature_vector).shape
print(f"Shape of a single image feature: {shape_val_images}")


Number of images: 4147
Shape of a single image feature: (1536,)
