In [90]:
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.applications.xception import Xception
from pickle import load

In [91]:
# Load the Models 
# Load tokenizer
tokenizer = load(open('tokenizer.p', 'rb'))
vocab_size = len(tokenizer.word_index) + 1
max_length = 32

# Load trained model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
from tensorflow.keras.models import Model

In [115]:
def define_model(vocab_size, max_length):
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model


In [139]:
model = define_model(vocab_size, max_length)
model.load_weights(r"C:\Users\ASUS\Desktop\Jupyter Notebook\ML_Project\models2\model_image_caption4.h5")

# Load Xception model for feature extraction
xception_model = Xception(include_top=False, pooling="avg")

In [140]:
# Define Helper Functions 
def extract_features(filename, model):
    image = Image.open(filename)
    image = image.resize((299, 299))
    image = np.array(image)
    if image.shape[2] == 4:
        image = image[..., :3]
    image = np.expand_dims(image, axis=0)
    image = image / 127.5
    image = image - 1.0
    feature = model.predict(image, verbose=0)
    return feature

def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [141]:
def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'start'
    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([photo, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = word_for_id(yhat, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'end':
            break
    return in_text

In [142]:
# Tkinter App
def select_image():
    file_path = filedialog.askopenfilename()
    if file_path:
        # Display image
        img = Image.open(file_path)
        img = img.resize((250, 250))
        img = ImageTk.PhotoImage(img)
        image_panel.config(image=img)
        image_panel.image = img

        # Predict caption
        photo = extract_features(file_path, xception_model)
        
        description = generate_desc(model, tokenizer, photo, max_length)
        caption = description.replace('start', '').replace('end', '').strip()
        result_label.config(text="Caption:\n" + caption)


In [None]:

# Create GUI window
root = tk.Tk()
root.title("Image Caption Generator")
root.geometry("800x800")

btn = tk.Button(root, text="Select Image", command=select_image, font=('Arial', 14))
btn.pack(pady=20)

image_panel = tk.Label(root)
image_panel.pack(pady=20)

result_label = tk.Label(root, text="Caption for the image...", wraplength=350, font=('Arial', 12))
result_label.pack(pady=20)

root.mainloop()
