In [1]:
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import os
from io import BytesIO


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
openai = OpenAI()

In [3]:
system_message = "You are a conversation assistant for elderly. Don't answer questions you don't have any data about"

In [4]:
from pydub import AudioSegment
from pydub.playback import play

def talker(message):
    response = openai.audio.speech.create(
      model="tts-1",
      voice="onyx",
      input=message
    )
    
    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format="mp3")
    play(audio)

In [5]:
def stream_gpt4o(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    stream = openai.chat.completions.create(
        model='gpt-4o',
        messages=messages,
        stream=True
    )
    

    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        
        yield result
    talker(result)

In [6]:
theme = gr.themes.Soft()


view = gr.Interface(
    fn=stream_gpt4o,
    theme=theme,
    inputs=[gr.Textbox(label="Your message:")],
    outputs=[gr.Markdown(label="Response:")],
    allow_flagging="never"
)
view.launch()



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [7]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wavfile
import whisper
import time

# Whisper model; you can use "tiny", "base", "small", "medium", or "large"
model = whisper.load_model("base")  

samplerate = 16000  # Whisper prefers 16kHz audio
duration = 5        # seconds to record

print("Recording for 5 seconds... Please speak now!")
time.sleep(1)

# Record audio from the default microphone on macOS
recording = sd.rec(int(duration * samplerate), 
                   samplerate=samplerate, 
                   channels=1, 
                   dtype='int16')
sd.wait()  # Block until recording is finished
print("Recording complete. Transcribing...")

# Save to a WAV file
wavfile.write("temp.wav", samplerate, recording)

# Transcribe with Whisper
# You could also do model.transcribe(np array) in memory,
# but saving to a file first is simpler for demonstration.
result = model.transcribe("temp.wav", fp16=False)

print("Transcription:", result["text"])


100%|███████████████████████████████████████| 139M/139M [00:07<00:00, 19.3MiB/s]
  checkpoint = torch.load(fp, map_location=device)


Recording for 5 seconds... Please speak now!
Recording complete. Transcribing...
Transcription:  Hello there. How are you?


Input #0, wav, from '/var/folders/w2/nyq_87_d7l5dzyd9hf6z84lw0000gn/T/tmpzf74wt9r.wav':
  Duration: 00:00:43.87, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
  43.81 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


