<h1 align = "Center">Image Captioning Using VGG16</h1>
<h5><b>Authors:</b> <ul><li>Suryakiran R</li><br><li>Durvank Gade</li><br><li>Nikhil Bansal</li><br><li>Rishvan Rajavel</li><br></ul></h5>

In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import os
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications.vgg16 import preprocess_input

In [2]:
image_file = "Data/Images/flickr30k_images"
captions_file = "Data/captions.txt"

In [3]:
cap_df = pd.read_csv(captions_file)
cap_df.columns = ['image','caption']

In [4]:
def preprocess_image(image_file, target_size=(224, 224)):
    img = Image.open(image_file).resize(target_size)
    img = np.array(img)
    if img.shape[-1] == 4:  # If the image has an alpha channel, remove it
        img = img[..., :3]
    img = preprocess_input(img)  # VGG16 preprocessing
    return img

In [5]:
def preprocess_captions(captions, max_length=20):
    tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
    tokenizer.fit_on_texts(captions)
    sequences = tokenizer.texts_to_sequences(captions)
    padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')
    return tokenizer, padded_sequences



In [6]:
captions_list = cap_df['caption'].astype(str).tolist()
tokenizer, padded_captions = preprocess_captions(captions_list)

preprocessed_images = []
for img_name in cap_df['image'][0:5000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))



In [7]:
for img_name in cap_df['image'][5000:10000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))

In [8]:
for img_name in cap_df['image'][10000:15000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))

In [9]:
for img_name in cap_df['image'][15000:20000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))

In [10]:
for img_name in cap_df['image'][20000:25000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))

In [11]:
for img_name in cap_df['image'][25000:30000]:
    img_path = image_file + '/' + img_name
    preprocessed_images.append(preprocess_image(img_path))

In [12]:
preprocessed_images = np.array(preprocessed_images)

In [16]:
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 0us/step


In [20]:
# def extract_features(preprocessed_images):
#     features = model.predict(preprocessed_images, batch_size=32, verbose=1)
#     return features

# features = extract_features(preprocessed_images)
def extract_features_in_chunks(preprocessed_images, chunk_size=5000):
    num_images = preprocessed_images.shape[0]
    features = []

    for start_idx in range(0, num_images, chunk_size):
        end_idx = min(start_idx + chunk_size, num_images)
        batch_images = preprocessed_images[start_idx:end_idx]
        batch_features = model.predict(batch_images, batch_size=32, verbose=1)
        features.append(batch_features)
    
    return np.vstack(features)

# Example usage:
features = extract_features_in_chunks(preprocessed_images, chunk_size=5000)


[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1674s[0m 11s/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m542s[0m 3s/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m503s[0m 3s/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1009s[0m 6s/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1046s[0m 7s/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m513s[0m 3s/step


NameError: name 'pickle' is not defined

In [21]:
import pickle
import os

In [22]:
np.save('features.npy', features)
np.save('padded_captions.npy', padded_captions)
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [23]:
features = np.load('features.npy')

# Check the shape and type of features
print(f"Features shape: {features.shape}")
print(f"Features type: {type(features)}")

# Print the first few entries to verify content
print(features[:5])

Features shape: (30000, 4096)
Features type: <class 'numpy.ndarray'>
[[2.7699418  2.890111   0.         ... 0.         0.01797792 1.4492029 ]
 [2.7699418  2.890111   0.         ... 0.         0.01797792 1.4492029 ]
 [2.7699418  2.890111   0.         ... 0.         0.01797792 1.4492029 ]
 [2.7699418  2.890111   0.         ... 0.         0.01797792 1.4492029 ]
 [2.7699418  2.890111   0.         ... 0.         0.01797792 1.4492029 ]]
