In [1]:
import os
import json

# Get the current working directory
cwd = os.getcwd()

# Navigate to the parent directory
parent_dir = os.path.abspath(os.path.join(cwd, os.pardir))

# Path to the directory containing train2014 images
image_dir = os.path.join(parent_dir, "NLP641FinalProject", "COCO_dataset", "train2014")

# Path to the captions mapping JSON file
json_file = os.path.join(parent_dir, "NLP641FinalProject", "COCO_dataset", "captions_mapping_train.json")


In [2]:
with open(json_file, "r") as f:
    captions_mapping = json.load(f)

image_files = os.listdir(image_dir)

In [3]:
len(captions_mapping), len(image_files)

(68363, 68363)

In [4]:
# for image_file in image_files:
#     image_path = 'COCO_dataset/val2014/' + image_file
#     if image_path not in captions_mapping:
#         os.remove(image_path)
#         print(f"Image {image_path} deleted.")

In [5]:
len(captions_mapping), len(image_files)

(68363, 68363)

In [28]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0

# Load the base model
base_model = EfficientNetB0(weights='imagenet', include_top=False)
base_model.trainable = False

# Preprocess the image
image_path = 'dog.jpeg'
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.keras.applications.efficientnet.preprocess_input(img_array)
img_tensor = tf.expand_dims(img_array, axis=0)  # Add batch dimension

# Pass the image tensor through the model
cnn_model = tf.keras.Model(inputs=base_model.input, outputs=base_model.output)
output = cnn_model(img_tensor)

print(output.shape)


(1, 7, 7, 1280)


In [41]:
# cnn_model.summary()

In [36]:
import tensorflow as tf

# Check if GPU is available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Set Metal as the default compute device
# tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)

# Load ResNet50 without top layers
resnet50 = tf.keras.applications.ResNet50(
    include_top=False,
    weights='imagenet',  # Or specify None if you don't want to use pre-trained weights
    input_shape=(224, 224, 3)  # Adjust input shape according to your needs
)
resnet50.trainable = False  # Freeze the weights of the ResNet50 base model

# Define additional layers
output = resnet50.output
output = tf.keras.layers.Reshape((-1, 2048))(output)  # Reshape the output to match the desired shape
output = tf.keras.layers.Dense(1280)(output)  # Add a dense layer to reduce dimensionality


Num GPUs Available:  1


In [37]:
# Create the model
cnn_model = tf.keras.Model(inputs=resnet50.input, outputs=output)

# Load and preprocess the image
image_path = 'dog.jpeg'
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.keras.applications.resnet50.preprocess_input(img_array)  # Preprocess input for ResNet50

# Process the image through the model
features = cnn_model.predict(tf.expand_dims(img_array, axis=0))

print(features.shape)  # Check the shape of the extracted features


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
(1, 49, 1280)


In [42]:
# cnn_model.summary()

In [39]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16

# Load the base model
base_model = VGG16(weights='imagenet', include_top=False)
base_model.trainable = False

# Preprocess the image
image_path = 'dog.jpeg'
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.keras.applications.vgg16.preprocess_input(img_array)
img_tensor = tf.expand_dims(img_array, axis=0)  # Add batch dimension

# Pass the image tensor through the model
cnn_model = tf.keras.Model(inputs=base_model.input, outputs=base_model.output)
output = cnn_model(img_tensor)

print(output.shape)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 0us/step
(1, 7, 7, 512)


In [40]:
cnn_model.summary()