In [4]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.


In [8]:
import keras
import tensorflow
print(keras.__version__)
print(tensorflow.__version__)

2.12.0
2.12.0


In [6]:
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gradio as gr

# Load MobileNetV2 model
mobilenet_model = MobileNetV2(weights="imagenet")
mobilenet_model = Model(inputs=mobilenet_model.inputs, outputs=mobilenet_model.layers[-2].output)

# Load your trained model
model = tf.keras.models.load_model('mymodel.h5')

# Load the tokenizer
with open('tokenizer.pkl', 'rb') as tokenizer_file:
    tokenizer = pickle.load(tokenizer_file)

# Max caption length
max_caption_length = 34

# Function to get word from index
def get_word_from_index(index, tokenizer):
    return next((word for word, idx in tokenizer.word_index.items() if idx == index), None)

# Generate caption using the model
def predict_caption(image, model, tokenizer, max_caption_length):
    # Preprocess the image
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)

    # Extract features using MobileNetV2
    image_features = mobilenet_model.predict(image, verbose=0)

    # Start generating the caption
    caption = "startseq"
    for _ in range(max_caption_length):
        sequence = tokenizer.texts_to_sequences([caption])[0]
        sequence = pad_sequences([sequence], maxlen=max_caption_length)
        yhat = model.predict([image_features, sequence], verbose=0)
        predicted_index = np.argmax(yhat)
        predicted_word = get_word_from_index(predicted_index, tokenizer)
        if predicted_word is None or predicted_word == "endseq":
            break
        caption += " " + predicted_word

    # Clean up the generated caption
    generated_caption = caption.replace("startseq", "").replace("endseq", "")
    return generated_caption

# Gradio function to handle image input and generate caption
def generate_caption(uploaded_image):
    # Convert the uploaded image to array and resize
    image = img_to_array(uploaded_image)
    image = image.reshape((1, 224, 224, 3))
    # Generate the caption
    caption = predict_caption(image, model, tokenizer, max_caption_length)
    return caption

# Gradio interface
inputs = gr.inputs.Image(shape=(224, 224), label="Upload Image")
outputs = gr.outputs.Textbox(label="Generated Caption")

# Create Gradio interface
gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs, 
             title="Image Caption Generator", 
             description="Upload an image and generate a caption using a trained model.").launch()


ValueError: bad marshal data (unknown type code)

In [7]:
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gradio as gr

# Function to load the tokenizer
def load_tokenizer(tokenizer_path='tokenizer.pkl'):
    try:
        with open(tokenizer_path, 'rb') as tokenizer_file:
            tokenizer = pickle.load(tokenizer_file)
        return tokenizer
    except Exception as e:
        print(f"Error loading tokenizer: {e}")
        return None

# Function to load MobileNetV2 model
def load_mobilenet():
    try:
        mobilenet_model = MobileNetV2(weights="imagenet")
        mobilenet_model = Model(inputs=mobilenet_model.inputs, outputs=mobilenet_model.layers[-2].output)
        return mobilenet_model
    except Exception as e:
        print(f"Error loading MobileNetV2: {e}")
        return None

# Function to load your trained model
def load_trained_model(model_path='mymodel.h5'):
    try:
        model = tf.keras.models.load_model(model_path)
        return model
    except Exception as e:
        print(f"Error loading trained model: {e}")
        return None

# Load all necessary components
mobilenet_model = load_mobilenet()
model = load_trained_model()
tokenizer = load_tokenizer()

if mobilenet_model is None or model is None or tokenizer is None:
    raise SystemExit("Failed to load necessary models or tokenizer. Please check the error messages above.")

# Max caption length
max_caption_length = 34

# Function to get word from index
def get_word_from_index(index, tokenizer):
    return next((word for word, idx in tokenizer.word_index.items() if idx == index), None)

# Generate caption using the model
def predict_caption(image, model, tokenizer, max_caption_length):
    try:
        # Preprocess the image
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        image = preprocess_input(image)

        # Extract features using MobileNetV2
        image_features = mobilenet_model.predict(image, verbose=0)

        # Start generating the caption
        caption = "startseq"
        for _ in range(max_caption_length):
            sequence = tokenizer.texts_to_sequences([caption])[0]
            sequence = pad_sequences([sequence], maxlen=max_caption_length)
            yhat = model.predict([image_features, sequence], verbose=0)
            predicted_index = np.argmax(yhat)
            predicted_word = get_word_from_index(predicted_index, tokenizer)
            if predicted_word is None or predicted_word == "endseq":
                break
            caption += " " + predicted_word

        # Clean up the generated caption
        generated_caption = caption.replace("startseq", "").replace("endseq", "")
        return generated_caption
    except Exception as e:
        return f"Error generating caption: {e}"

# Gradio function to handle image input and generate caption
def generate_caption(uploaded_image):
    try:
        # Convert the uploaded image to array and resize
        image = img_to_array(uploaded_image)
        image = tf.image.resize(image, (224, 224)).numpy()
        image = image / 255.0  # Normalize if needed
        image = image.astype(np.float32)

        # Generate the caption
        caption = predict_caption(image, model, tokenizer, max_caption_length)
        return caption
    except Exception as e:
        return f"Error processing image: {e}"

# Gradio interface
inputs = gr.Image(type="numpy", shape=(224, 224), label="Upload Image")
outputs = gr.Textbox(label="Generated Caption")

# Create Gradio interface
gr.Interface(
    fn=generate_caption,
    inputs=inputs,
    outputs=outputs, 
    title="Image Caption Generator", 
    description="Upload an image and generate a caption using a trained model."
).launch()


Error loading trained model: bad marshal data (unknown type code)


SystemExit: Failed to load necessary models or tokenizer. Please check the error messages above.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
