In [None]:
import base64
import random
import tempfile

import numpy as np
import requests
import streamlit as st
import whisper
from audiorecorder import audiorecorder
from elevenlabs import generate, set_api_key
from langchain import LLMChain, OpenAI, PromptTemplate
from loguru import logger


def fetch_gif(url: str) -> str:
    """Fetch a gif from a url and return a base64 encoded string"""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            gif_data = response.content
            return base64.b64encode(gif_data).decode("utf-8")
        else:
            st.write("Failed to fetch the GIF.")
            logger.error(f"Failed to fetch the GIF. {response.json()}")
    except requests.exceptions.RequestException as e:
        st.write("An error occurred while fetching the GIF:", e)
        logger.error(f"An error occurred while fetching the GIF: {e}")


def talking_buddy() -> object:
    """Return a html of a random gif of a math buddy talking"""
    gif_list = ["https://media.tenor.com/ZZlzCymWkRoAAAAi/akirambow-smile-person.gif"]
    gif_url = random.choice(gif_list)
    base64_data = fetch_gif(gif_url)
    if base64_data:
        return f'<img src="data:image/gif;base64,{base64_data}" alt="buddy gif" width="400">'
    else:
        return ""


def transcribe(audio: np.ndarray) -> str:
    """Transcribe audio file using whisper"""
    # Saving the audio to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as input_file:
        input_file.write(audio.tobytes())

    st.write("Sure...just a moment")
    model = whisper.load_model("base")
    result = model.transcribe(input_file.name)
    input_file.close()
    st.write(f"Transcribed Audio: {result['text']}")
    logger.info(f"Transcribed Audio: {result['text']}")
    return result["text"]


def ask_openai(transcribed_text: str, openai_key: str) -> str:
    """Ask OpenAI using langchain"""
    image_placeholder = st.markdown(talking_buddy(), unsafe_allow_html=True)
    # Ask Open AI using langchain

    template = """Assistant is a large language model trained by OpenAI.

    Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

    Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

    Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.

    Human: {human_input}
    Assistant:"""

    prompt = PromptTemplate(input_variables=["human_input"], template=template)

    chatgpt_chain = LLMChain(
        llm=OpenAI(temperature=0, openai_api_key=openai_key), prompt=prompt
    )

    output = chatgpt_chain.predict(human_input=transcribed_text)
    st.write(f"Open AI Chatbot: {output}")
    logger.info(f"Open AI Chatbot: {output}")
    image_placeholder.empty()
    return output


def tts_with_elevenlabs(answer: str, eleven_labs_key: str) -> None:
    """Text to speech using elevenlabs"""
    st.write("Generating audio...")
    set_api_key(eleven_labs_key)
    try:
        logger.info(f"Generating audio...{answer}")
        eleven_audio = generate(
            text=answer, voice="Bella", model="eleven_monolingual_v1"
        )

        output_audio_placeholder = st.empty()
        output_audio_str = (
            f"data:audio/ogg;base64,{base64.b64encode(eleven_audio).decode()}"
        )
        eleven_audio = None
        output_audio_html = f"""
                {talking_buddy()}
                <audio autoplay class="stAudio">
                <source src="{output_audio_str}" type="audio/ogg">
                Your browser does not support the audio element.
                </audio>
            """
        output_audio_placeholder.markdown(output_audio_html, unsafe_allow_html=True)
    except Exception as e:
        st.write("An error occurred while generating the audio:", e)


def main():
    """Main function"""
    st.title("Chat over Voice Demo - BuddyGPT")
    st.write("Hello there. I am BuddyGPT. I am here to help you with your questions.")
    keys_present = False
    with st.sidebar:
        st.write("Before we start, could you add in the following details?")
        openai_key = st.text_input(
            "Enter your Open AI Key 👇",
            placeholder="This is a placeholder",
        )
        eleven_labs_key = st.text_input(
            "Enter your Eleven Labs Key 👇",
            placeholder="This is a placeholder",
        )
    if openai_key and eleven_labs_key:
        keys_present = True

    if keys_present:
        audio = audiorecorder("Click to record", "Recording...")

        if len(audio) > 0:
            try:
                transcribed_text = transcribe(audio)
                answer = ask_openai(transcribed_text, openai_key)
                tts_with_elevenlabs(answer, eleven_labs_key)
            except Exception as e:
                st.write("An error occurred while processing:", e)
                logger.error(f"An error occurred while processing: {e}")


if __name__ == "__main__":
    main()