In [3]:
import numpy as np
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import load_model, Model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.image import load_img, img_to_array


In [2]:
# Load the VGG16 model for feature extraction
base_model = VGG16(weights='imagenet')
model_vgg16 = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

# Load the trained captioning model
caption_model_path = '/content/caption_model.h5'
caption_model = load_model(caption_model_path)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [6]:
from keras.preprocessing.sequence import pad_sequences

def generate_caption(model, tokenizer, image, max_length):
    in_text = '<start>'
    image = image.reshape((1, -1))  # Reshape the image features to fit the model's expected input shape

    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length, padding='pre')
        yhat = model.predict([image, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word.get(yhat, "?")
        if word == '<end>':
            break
        in_text += ' ' + word

    # Remove <start> and <end> tokens for the final output
    final_caption = in_text.replace('<start> ', '').replace(' <end>', '')
    return final_caption


In [7]:
def preprocess_image(image_path, target_size=(224, 224)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

def extract_image_features(model, image_array):
    features = model.predict(image_array)
    return features[0]

def caption_raw_image(image_path, tokenizer, max_length):
    # Preprocess the image
    image_array = preprocess_image(image_path)
    # Extract features using VGG16
    image_features = extract_image_features(model_vgg16, image_array)
    # Generate the caption using the trained caption model
    caption = generate_caption(caption_model, tokenizer, image_features, max_length)
    return caption


In [8]:
# Load your tokenizer (assuming you've saved it as a pickle file in the original notebook)
import pickle
tokenizer_path = '/content/tokenizer.pkl'
with open(tokenizer_path, 'rb') as f:
    tokenizer = pickle.load(f)

# Define your max_length (used during training)
max_length = 38

image_path = '/content/dog_standing.jpg'
print(caption_raw_image(image_path, tokenizer, max_length))


approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches approaches


In [9]:
image_path = '/content/football.jpg'
print(caption_raw_image(image_path, tokenizer, max_length))


derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed derssed
