In [54]:
# Cell 12: Load the Trained Model
from tensorflow.keras.models import load_model
import pickle

# Load the trained model
model = load_model('model/image_captioning_model1.h5')

# Load the tokenizer
with open('model/tokenizer1.pkl', 'rb') as f:
    tokenizer = pickle.load(f)




In [55]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout
import numpy as np
import os
from PIL import Image

In [56]:

# Load InceptionV3 for feature extraction
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

def load_feature_extractor():
    base_model = InceptionV3(weights='imagenet', include_top=False)
    feature_extractor = Model(inputs=base_model.input, outputs=GlobalAveragePooling2D()(base_model.output))
    return feature_extractor

feature_extractor = load_feature_extractor()


In [57]:
# Cell 13: Preprocess New Image
import numpy as np
from PIL import Image
def preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((224, 224))  # Resize image to a fixed size
    img = np.array(img) / 255.0   # Normalize the image
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

# Load and preprocess a new image
image_path = 'sample_data/36979.jpg'  # Replace with the path to the new image
new_image = preprocess_image(image_path)
print("Image Shape :",new_image.shape) 
#if len(new_image.shape) == 4 and new_image.shape[0] == 1:
#                    new_image = np.squeeze(new_image, axis=0) 
print("Image Shape :",new_image.shape)

Image Shape : (1, 224, 224, 3)
Image Shape : (1, 224, 224, 3)


In [58]:
def generate_caption(model, image, tokenizer, max_caption_length):
    # Preprocess the image so it has the shape (1, 224, 224, 3)
    #image = preprocess_image(image)  # Ensure the function resizes the image to (224, 224, 3)
    #image = np.expand_dims(image, axis=0)  # Add batch dimension

    # Start caption generation with the <start> token
    caption = ['<start>']
    
    for _ in range(max_caption_length):
        # Convert current caption to sequence
        sequence = tokenizer.texts_to_sequences([caption])[0]
        sequence = pad_sequences([sequence], maxlen=max_caption_length, padding='post')
        
        # Predict next word
        y_pred = model.predict([image, sequence], verbose=0)  # Pass the image directly
        predicted_word_idx = np.argmax(y_pred[0])
        predicted_word = tokenizer.index_word[predicted_word_idx]
        
        # Append the predicted word to the caption
        caption.append(predicted_word)
        
        # Stop if <end> token is predicted
        if predicted_word == '<end>':
            break
    
    # Remove <start> and <end> tokens from the generated caption
    caption = ' '.join(caption[1:-1])
    return caption


max_caption_length=9
# Generate a caption for the new image
generated_caption = generate_caption(model, new_image, tokenizer, max_caption_length)
print(f"Generated Caption: {generated_caption}")


Generated Caption: around around around around several several several several
