In [None]:
!pip install -r requirements.txt

In [None]:
from speech_recognition import AudioFile, Recognizer

def stt(audio: object, language: str) -> str:
    """Converts speech to text.

    Args:
        audio: record of user speech

    Returns:
        text (str): recognized speech of user
    """

    # Create a Recognizer object
    r = Recognizer()
    # Open the audio file
    with AudioFile(audio) as source:
        # Listen for the data (load audio to memory)
        audio_data = r.record(source)
        # Transcribe the audio using Google's speech-to-text API
        text = r.recognize_google(audio_data, language=language)
    return text

In [None]:
from gtts import gTTS

def tts(text: str, language: str) -> object:
    """Converts text into audio object.

    Args:
        text (str): generated answer of bot

    Returns:
        object: text to speech object
    """
    return gTTS(text=text, lang=language, slow=False)

In [None]:
from io import BytesIO

def tts_to_bytesio(tts_object: object) -> bytes:
    """Converts tts object to bytes.

    Args:
        tts_object (object): audio object obtained from gtts

    Returns:
        bytes: audio bytes
    """
    bytes_object = BytesIO()
    tts_object.write_to_fp(bytes_object)
    bytes_object.seek(0)
    return bytes_object.getvalue()

In [None]:
from base64 import b64encode

def html_audio_autoplay(bytes: bytes) -> object:
    """Creates html object for autoplaying audio at gradio app.

    Args:
        bytes (bytes): audio bytes

    Returns:
        object: html object that provides audio autoplaying
    """
    b64 = b64encode(bytes).decode()
    html = f"""
    <audio controls autoplay>
    <source src="data:audio/wav;base64,{b64}" type="audio/wav">
    </audio>
    """
    return html

In [None]:
from gradio import Audio, Interface, Textbox
import langchain
import openai
import os

#os.environ["OPENAI_API_KEY"] = ""

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain

chat_model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106")
conversation = ConversationChain(llm=chat_model)

In [None]:
def main(audio: object):
    """Calls functions for deploying gradio app.

    It responds both verbally and in text
    by taking voice input from user.

    Args:
        audio (object): recorded speech of user

    Returns:
        tuple containing

        - user_speech_text (str) : recognized speech
        - bot_response_de (str) : translated answer of bot
        - html (object) : autoplayer for bot's speech
    """
    desired_language = "de"
    user_speech_text = stt(audio, desired_language)
    #print(user_speech_text)
    bot_response_de = conversation.run(user_speech_text)
    #print("\n"+bot_response_de)
    bot_voice = tts(bot_response_de, desired_language)
    bot_voice_bytes = tts_to_bytesio(bot_voice)
    html = html_audio_autoplay(bot_voice_bytes)
    return user_speech_text, bot_response_de, html

In [None]:
Interface(
    fn=main,
    inputs=[
        Audio(
            source="microphone",
            type="filepath",
        ),
    ],
    outputs=[
        Textbox(label="Sie sagen: "),
        Textbox(label="KI antwort: "),
        "html",
    ],
    live=True,
    allow_flagging="never",
).launch(share=True)

Hint: Set streaming=True for Audio component to use live streaming.
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()

Setting up a public link... we have recently upgraded the way public links are generated. If you encounter any problems, please report the issue and downgrade to gradio version 3.13.0
.
Running on public URL: https://6ca84447-3d3a-47ab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




In [None]:
conversation.memory

NameError: ignored