In [11]:
!pip install tensorflow numpy matplotlib

[0m

In [None]:
import tensorflow as tf
from tf.keras.models import Model
from tf.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
import numpy as np
import matplotlib.pyplot as plt

def preprocess_image(img_path):
    from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
    from tensorflow.keras.preprocessing import image
    model = VGG16(weights='imagenet')
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)  # Exclude the final classification layer
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return features


max_length = 34  
vocab_size = 5000  
embedding_dim = 256 
units = 512  

inputs1 = Input(shape=(4096,))  
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)

inputs2 = Input(shape=(max_length,))  
se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(units)(se2)

decoder1 = add([fe2, se3])
decoder2 = Dense(units, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')

def train_model(model, image_features, captions, epochs=20, batch_size=64):
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs} - Training in progress...")

def generate_caption(model, image_feature, tokenizer, max_length):
    in_text = 'startseq'
    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = tf.keras.preprocessing.sequence.pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([image_feature, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word[yhat]
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text
