In [1]:
# import all necessary libraries
import os
import streamlit as st
import openai
import speech_recognition as sr
import pyaudio
import wave
import requests
from io import BytesIO
from PIL import Image
from dotenv import load_dotenv

In [2]:
# ✅ Load OpenAI API Key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")  # Replace with your actual key

In [3]:
# ✅ Streamlit App UI
st.title("Real-Time AI Image Modifier with Speech")

uploaded_image = st.file_uploader(" Upload an Image", type=["jpg", "jpeg", "png"])

2025-02-04 01:01:24.551 
  command:

    streamlit run c:\Users\bharw\Documents\GitHub\vettura-genai\venv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [4]:
# Function to Record Real-Time Audio
def record_audio(filename="recorded_audio.wav", duration=5, sample_rate=44100):
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    with microphone as source:
        st.write("🎙 Speak now...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, timeout=duration)

    with open(filename, "wb") as f:
        f.write(audio.get_wav_data())
    
    return filename

In [5]:
#  Function to Transcribe Audio Using OpenAI Whisper
def transcribe_audio(audio_file):
    with open(audio_file, "rb") as audio:
        response = openai.Audio.transcribe(model="whisper-1", file=audio)
    return response["text"]

In [6]:
#  Function to Generate AI Image Using DALL-E 3
def generate_image(prompt):
    response = openai.Image.create(
        model="dall-e-3",
        prompt=prompt,
        n=1,
        size="1024x1024"
    )
    image_url = response["data"][0]["url"]
    image_response = requests.get(image_url)
    img = Image.open(BytesIO(image_response.content))
    return img

In [7]:
#  Real-Time Speech Recording Button
if st.button("🎙 Start Real-Time Recording"):
    audio_path = record_audio(duration=15)
    st.session_state["audio_path"] = audio_path
    st.success("Recording Completed")



In [8]:
#  Process Image Modification After Recording
if uploaded_image and "audio_path" in st.session_state and st.button("Generate Modified Image"):
    transcription = transcribe_audio(st.session_state["audio_path"])
    st.write(f" **Transcription:** {transcription}")

    # Generate AI-modified image
    modified_image = generate_image(transcription)

    st.image(uploaded_image, caption="📸 Original Image", use_container_width=True)
    st.image(modified_image, caption="🎨 AI-Generated Image", use_container_width=True)

    # Provide Download Option
    modified_image.save("modified_image.png")
    with open("modified_image.png", "rb") as file:
        st.download_button(label=" Download Modified Image", data=file, file_name="modified_image.png", mime="image/png")