# # AI Tool that creates captions based on the image provided by the user. It is based on MobileNetV2 to predict scenes of the image and use language model such as ChatGPT3 to prompt creative and catchy captions based on the scenes predicted in the image. 

In [5]:
import tkinter as tk
from tkinter import filedialog
from PIL import ImageTk, Image
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input, decode_predictions
import cv2
import numpy as np
import openai

# Set up OpenAI API credentials
openai.api_key = "YOUR  OPENAI API KEY "

# Load the MobileNetV2 model
model = MobileNetV2(weights='imagenet')

# Create the Tkinter window
window = tk.Tk()
window.title("Simra's Image Caption Generator")
window.geometry("900x700")
window.configure(bg="#FFFACD")

# Declare global variables
file_path = ""

def upload_image():
    global file_path
    file_path = filedialog.askopenfilename()
    image = Image.open(file_path)
    image = image.resize((400, 300))
    image = ImageTk.PhotoImage(image)
    image_label.configure(image=image)
    image_label.image = image
    generate_button.configure(state=tk.NORMAL)

def recognize_objects_and_scenes(image):
    # Preprocess the input image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    image = preprocess_input(image)

    # Perform object recognition
    predictions = model.predict(np.expand_dims(image, axis=0))
    decoded_predictions = decode_predictions(predictions, top=10)[0]

    # Perform scene identification
    scene_class_name = decode_predictions(predictions, top=1)[0][0][1]

    return decoded_predictions, scene_class_name

def generate_caption(tone):
    global file_path
    image = cv2.cvtColor(cv2.imread(file_path), cv2.COLOR_BGR2RGB)
    _, scene_class_name = recognize_objects_and_scenes(image)

    # Generate captions using ChatGPT-3
    prompt = f"The scene in the image is {scene_class_name}. Generate a {tone.lower()} catchy and creative caption ."
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=50,
        n=3,  # Generate 3 different captions
        temperature=0.7,
        stop=None,
    )
    captions = [choice["text"].strip() for choice in response.choices]

    display_captions(captions)

def display_captions(captions):
    caption_label.config(text="\n".join(captions))

# Configure button styles
button_style = {"font": ("Arial", 14), "bg": "#FFC300", "fg": "#000000", "relief": tk.RAISED, "width": 15, "height": 1}

upload_button = tk.Button(window, text="Upload Image", command=upload_image, **button_style)
upload_button.place(relx=0.2, rely=0.1, anchor=tk.CENTER)

image_label = tk.Label(window, bg="#FFFACD")
image_label.place(relx=0.5, rely=0.3, anchor=tk.CENTER)

tone_label = tk.Label(window, text="Select Caption Tone:", font=("Arial", 14), bg="#FFFACD")
tone_label.place(relx=0.5, rely=0.5, anchor=tk.CENTER)

tone_var = tk.StringVar(window)
tone_var.set("Adventurous")  # Default tone
tone_dropdown = tk.OptionMenu(window, tone_var, "Adventurous", "Professional", "Happy", "Sad", "Humorous", "Motivating")
tone_dropdown.config(font=("Arial", 14), bg="#FFC300", fg="#000000", relief=tk.RAISED, width=15, height=2)
tone_dropdown.place(relx=0.5, rely=0.6, anchor=tk.CENTER)

generate_button = tk.Button(window, text="Generate Caption", command=lambda: generate_caption(tone_var.get()), **button_style)
generate_button.place(relx=0.5, rely=0.7, anchor=tk.CENTER)
generate_button.configure(state=tk.DISABLED)

caption_label = tk.Label(window, wraplength=800, font=("Arial", 16), justify=tk.CENTER, bg="#FFFACD")
caption_label.place(relx=0.5, rely=0.9, anchor=tk.CENTER)

window.mainloop()




In [6]:
import tkinter as tk
from tkinter import filedialog
from PIL import ImageTk, Image
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input, decode_predictions
import cv2
import numpy as np
import easyocr
import openai

# Set up OpenAI API credentials
openai.api_key = "YOUR  OPENAI API KEY"

# Load the MobileNetV2 model
model = MobileNetV2(weights='imagenet')

# Create the Tkinter window
window = tk.Tk()
window.title("Image Caption Generator")
window.geometry("900x700")
window.configure(bg="#FFFACD")

# Declare global variables
file_path = ""

def upload_image():
    global file_path
    file_path = filedialog.askopenfilename()
    image = Image.open(file_path)
    image = image.resize((400, 300))
    image = ImageTk.PhotoImage(image)
    image_label.configure(image=image)
    image_label.image = image
    generate_button.configure(state=tk.NORMAL)

def recognize_objects_and_scenes(image):
    # Preprocess the input image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    image = preprocess_input(image)

    # Perform object recognition
    predictions = model.predict(np.expand_dims(image, axis=0))
    decoded_predictions = decode_predictions(predictions, top=1000)[0]

    # Perform scene identification
    scene_class_name = decode_predictions(predictions, top=1)[0][0][1]

    return decoded_predictions, scene_class_name

def recognize_text(image):
    reader = easyocr.Reader(['en'])
    result = reader.readtext(image)
    text = ' '.join([res[1] for res in result])
    return text

def generate_caption(tone):
    global file_path
    image = cv2.cvtColor(cv2.imread(file_path), cv2.COLOR_BGR2RGB)
    _, scene_class_name = recognize_objects_and_scenes(image)
    recognized_text = recognize_text(image)

    # Generate captions using ChatGPT-3
    prompt = f"The scene in the image is {scene_class_name}. The recognized text is '{recognized_text}'. Generate a {tone.lower()} long catchy and creative caption."
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=50,
        n=3,  # Generate 3 different captions
        temperature=0.7,
        stop=None,
    )
    captions = [choice["text"].strip() for choice in response.choices]

    display_captions(captions)

def display_captions(captions):
    caption_label.config(text="\n".join(captions))

# Configure button styles
button_style = {"font": ("Arial", 14), "bg": "#FFC300", "fg": "#000000", "relief": tk.RAISED, "width": 15, "height": 1}

upload_button = tk.Button(window, text="Upload Image", command=upload_image, **button_style)
upload_button.place(relx=0.2, rely=0.1, anchor=tk.CENTER)

image_label = tk.Label(window, bg="#FFFACD")
image_label.place(relx=0.5, rely=0.3, anchor=tk.CENTER)

tone_label = tk.Label(window, text="Select Caption Tone:", font=("Arial", 14), bg="#FFFACD")
tone_label.place(relx=0.5, rely=0.5, anchor=tk.CENTER)

tone_var = tk.StringVar(window)
tone_var.set("Adventurous")  # Default tone
tone_dropdown = tk.OptionMenu(window, tone_var, "Adventurous", "Professional", "Happy", "Sad", "Humorous", "Motivating")
tone_dropdown.config(font=("Arial", 14), bg="#FFC300", fg="#000000", relief=tk.RAISED, width=15, height=2)
tone_dropdown.place(relx=0.5, rely=0.6, anchor=tk.CENTER)

generate_button = tk.Button(window, text="Generate Caption", command=lambda: generate_caption(tone_var.get()), **button_style)
generate_button.place(relx=0.5, rely=0.7, anchor=tk.CENTER)
generate_button.configure(state=tk.DISABLED)

caption_label = tk.Label(window, wraplength=800, font=("Arial", 16), justify=tk.CENTER, bg="#FFFACD")
caption_label.place(relx=0.5, rely=0.9, anchor=tk.CENTER)

window.mainloop()




CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.




CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
