In [1]:
pip install tf-keras

Defaulting to user installation because normal site-packages is not writeable
Collecting tf-keras
  Downloading tf_keras-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Collecting tensorflow<2.19,>=2.18 (from tf-keras)
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow<2.19,>=2.18->tf-keras)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting tensorboard<2.19,>=2.18 (from tensorflow-intel==2.18.0->tensorflow<2.19,>=2.18->tf-keras)
  Downloading tensorboard-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Collecting keras>=3.5.0 (from tensorflow-intel==2.18.0->tensorflow<2.19,>=2.18->tf-keras)
  Downloading keras-3.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading tf_keras-2.18.0-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   -------- -----------------------------



In [2]:
pip show tf-keras

Name: tf_keras
Version: 2.18.0
Summary: Deep learning for humans.
Home-page: https://keras.io/
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache 2.0
Location: C:\Users\SHASHANK\AppData\Roaming\Python\Python312\site-packages
Requires: tensorflow
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [31]:
import tkinter as tk
from tkinter import filedialog,messagebox
import cv2
from tkinter import PhotoImage
import pyttsx3
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input, decode_predictions
import numpy as np
from PIL import Image, ImageTk

In [32]:
#Initialize the text-to-speech(TTS) engine
engine= pyttsx3.init()

In [33]:
#Load the pretrained image recognition model
model= ResNet50(weights="imagenet")

Image Analysis Module

In [35]:
def preprocess_image(image_path):
#Preprocess the image to makee it compatible with ResNet model
    image= cv2.imread(image_path)
    image=cv2.resize(image,(224,224))
    image=np.expand_dims(image,axis=0)
    image=preprocess_input(image)
    return image

In [38]:
def analyze_image(image_path):
    #Predicts the class of the object in the image
    image=preprocess_image(image_path)
    predictions= model.predict(image)
    decoded_preds= decode_predictions(predictions, top=3)[0]
    tags= [f"{label}:{round(prob*100,2)}%" for (_,label,prob) in decoded_preds]
    return [label for _,label,_ in decoded_preds] #return top labels from the classes

Generate Response Module

In [47]:
def generate_response(image_tags):

    #generates the response based on the image tags
    prompt= f" The given image contains following objects: {','.join(image_tags)}. Thank you,It's Vyoma for you."
    return prompt

Text-To-Speech Module

In [41]:
def speak(text):
    #converting the text into speech
    engine.say(text)
    engine.runAndWait()

GUI Functions

In [42]:
def browse_image():
    """
    Opens file dialog to choose an image.
    """
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.jpg;*.jpeg;*.png;*.bmp")])
    if file_path:
        image_path.set(file_path)
        load_image(file_path)
        result_label.config(text="Image selected. Ready to analyze!")

In [43]:
def load_image(image_path):

   #Loads the selected image into the Tkinter UI.
    
    try:
        img = Image.open(image_path)
        img = img.resize((250, 250))
        img_tk = ImageTk.PhotoImage(img)
        image_label.config(image=img_tk)
        image_label.image = img_tk
    except Exception as e:
        print(f"Error loading image: {e}")

In [44]:
def analyze_and_speak():

    #Analyze the image, generate a response, and speak it.
    image_path_value = image_path.get()
    if not image_path_value:
        messagebox.showerror("Error", "Please select an image first.")
        return
    
    try:
        # Show loading message
        result_label.config(text="Analyzing image... Please wait.")
        
        # Step 1: Analyze the image
        tags = analyze_image(image_path_value)
        result_label.config(text=f"Identified objects: {', '.join(tags)}")

        # Step 2: Generate a response
        response_text = generate_response(tags)

        # Step 3: Convert response to speech
        speak(response_text)
        result_label.config(text=f"Response: {response_text}")

    except Exception as e:
        result_label.config(text=f"An error occurred: {e}")
        messagebox.showerror("Error", str(e))

Create GUI

In [48]:
root = tk.Tk()
root.title("Vyoma AI: Image-Assessed Voice Assistant")
root.geometry("600x650")  # Set window size

# UI Styling - Classic Color Scheme
root.config(bg="#2C3E50")  # Dark blue background
font_style = ("Arial", 12)

# Labels
title_label = tk.Label(root, text="Vyoma AI: Generative Image-Assessed Voice Assistant", font=("Arial", 14, "bold"), bg="#2C3E50", fg="white", anchor="center")
title_label.pack(pady=15)

image_label = tk.Label(root, text="No image selected", font=font_style, bg="#2C3E50", fg="white")
image_label.pack(pady=10)

result_label = tk.Label(root, text="Please select an image to analyze.", font=font_style, bg="#2C3E50", fg="white", wraplength=500)
result_label.pack(pady=20)

# Buttons
browse_button = tk.Button(root, text="Browse Image", font=("Arial", 12), bg="#27AE60", fg="white", width=20, height=2, command=browse_image)
browse_button.pack(pady=10)

analyze_button = tk.Button(root, text="Analyze & Speak", font=("Arial", 12), bg="#2980B9", fg="white", width=20, height=2, command=analyze_and_speak)
analyze_button.pack(pady=10)

# Run the Tkinter event loop
root.mainloop()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
