# Building an AI-Powered Image Captioning GUI with Tkinter and Hugging Face Transformers
***

Importing the necessary modules to build a graphical user interface (GUI) that allows users to select images and generate captions using a pre-trained model from Hugging Face’s **Transformers** library. 

In [1]:
# Import Tkinter for creating the graphical user interface (GUI)
import tkinter as tk

# Import specific Tkinter components for file selection, labels, buttons, frames, canvas, and scrollbars
from tkinter import filedialog, Label, Button, Frame, Canvas, Scrollbar

# Import Image and ImageTk from Pillow for image processing and compatibility with Tkinter
from PIL import Image, ImageTk

# Import BLIP processor and model from Hugging Face Transformers for image captioning
from transformers import BlipProcessor, BlipForConditionalGeneration

  torch.utils._pytree._register_pytree_node(


### Initialize the processor and model from Hugging Face

In [2]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") # Load the BLIP processor to prepare image data for the model
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # Load the pre-trained BLIP model for generating image captions

  return torch.load(checkpoint_file, map_location=map_location)


### Function to generate caption for a single image

In [3]:
def generate_caption(image_path):

    image = Image.open(image_path)    
    # Process the image to convert it into model-compatible inputs
    inputs = processor(image, return_tensors="pt")
    # Generate a caption for the image with a maximum of 50 tokens
    outputs = model.generate(**inputs, max_new_tokens=50)
    # Decode the model output into a readable caption text
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    
    return caption

### Function to select images and display captions

In [4]:
def select_images():
    # Clear previous images and captions
    for widget in image_frame.winfo_children():
        widget.destroy()

    # Open file dialog to select multiple images
    file_paths = filedialog.askopenfilenames(filetypes=[("Image files", "*.jpg *.jpeg *.png *.bmp")])
    
    # Define grid layout
    max_columns = 3  # Number of images per row
    row = 0
    col = 0

    # Process each selected image
    for file_path in file_paths:
        # Generate caption for the image
        caption = generate_caption(file_path)
        
        # Open the image and resize it for display
        img = Image.open(file_path)
        img.thumbnail((200, 200))  # Resize for display in the GUI
        img_tk = ImageTk.PhotoImage(img)

        # Display image and caption in the GUI
        img_label = Label(image_frame, image=img_tk, bg="white")
        img_label.image = img_tk  # Keep a reference to avoid garbage collection
        img_label.grid(row=row, column=col, padx=10, pady=10)

        caption_label = Label(image_frame, text=caption, wraplength=200, justify="center",
                              bg="white", fg="black", font=("Helvetica", 10, "italic"))
        caption_label.grid(row=row + 1, column=col, padx=10, pady=5)

        # Update column and row counters
        col += 1
        if col >= max_columns:
            col = 0
            row += 2  # Move to the next row after filling columns

### Create the main GUI window

In [5]:
root = tk.Tk()
root.title("Image Caption Generator")
root.geometry("600x600")
root.configure(bg="white")  # Set the main background color to white

# Instructions label
instruction_label = Label(root, text="Select multiple images to generate captions",
                          font=("Arial", 14), fg="black", bg="white")
instruction_label.pack(pady=10)

# Button to select images
select_button = Button(root, text="Select Images", command=select_images,
                       font=("Arial", 12), bg="lightgray", fg="black", activebackground="darkgray")
select_button.pack(pady=20)

# Create a canvas and scrollbar for the image_frame to make it scrollable
canvas = Canvas(root, bg="white", width=580, height=400)
canvas.pack(side="left", fill="both", expand=True)

scrollbar = Scrollbar(root, orient="vertical", command=canvas.yview)
scrollbar.pack(side="right", fill="y")

canvas.configure(yscrollcommand=scrollbar.set)

# Create a frame inside the canvas
image_frame = Frame(canvas, bg="white")

# Bind the frame to the canvas and add it to the canvas window
canvas.create_window((0, 0), window=image_frame, anchor="nw")

# Update the scroll region of the canvas whenever the frame's size changes
def update_scroll_region(event):
    canvas.configure(scrollregion=canvas.bbox("all"))

image_frame.bind("<Configure>", update_scroll_region)

# Run the GUI
root.mainloop()

***