# Multi Modal CodeBuddy: Audio Input support
**Support**
- LLM model:
    - OpenAI: gpt-4o-mini
    - ollama: llama3.2
- Trancribing model:
    - OpenAI: whisper-1
    - whisper: base, small, medium, large

![MM_Gradio Based Chatbot](../images/MM_chatbot_code_buddy.png)

In [None]:
#imports
import os
import sys
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

In [None]:
# Importing the CodeBuddy class
localPath = "../Code_Buddy/"
absolutePath = os.path.abspath(localPath)

if absolutePath not in sys.path:
    sys.path.append(absolutePath)

from CodeBuddy import CodeBuddy       

In [None]:
# Importing the Transcriber class
localPath = "../Transcriber/"
absolutePath = os.path.abspath(localPath)

if absolutePath not in sys.path:
    sys.path.append(absolutePath)

from Transcriber import Transcriber     

In [None]:
systemPrompt = "You are a helpful and informed coding agent.\
            You are given a piece of code. You have to check if the code is correct or is incorrect.\
            You need to explain the code in beginner friendly way.\
            You are also allowed to give suggestions on improvement of code for runtime optimization.\
            Give your answer in Markdown."

In [None]:
# Helper functions
def initialize_buddy(model):
    if model == "gpt-4o-mini":
        gr.Info("Paid services being used")
        return CodeBuddy("openai", model)
    elif model == "llama3.2":
        return CodeBuddy("ollama", model)
    else:
        raise ValueError(f"{model} model not supported")

def initialize_sst(model): #stt: speech to text
    if model == "whisper-1":
        gr.Info("Paid services being used")
        return Transcriber("openai", model)
    elif model in ["base", "small", "medium", "large"]: #Supportd models for whisper
        return Transcriber("whisper", model)
    else:
        raise ValueError(f"{model} model not supported")

def chat_stream(message, history, cbModel, buddy): 
    history = history or []

    # If buddy is None or model changed, reinitialize
    if buddy is None or buddy.modelName != cbModel:
        buddy = initialize_buddy(cbModel)

    history.append({"role": "user", "content": message})
    messages = [{"role": "system", "content": systemPrompt}] + history

    stream = buddy.runChatbot(userPrompt=history, systemPrompt=systemPrompt)

    reply = ""
    for chunk in stream:
        reply += chunk.choices[0].delta.content or ""
        yield "", history + [{"role": "assistant", "content": reply}], buddy


In [None]:
with gr.Blocks() as ui:
    with gr.Row(scale=4):
        with gr.Column(scale=1):
            # Dropdown for the CodeBuddy model
            modelName = gr.Dropdown(
                choices=["llama3.2", "gpt-4o-mini"],
                label="Select Model",
                value="gpt-4o-mini"
            )
            # Dropdown for thr trancribing model
            trancriberModelName = gr.Dropdown(
                choices=["whisper-1", "base", "small", "medium", "large"],
                label="Select Transcribing Model:",
                value="medium"
            )
            # Input audio for the trancribing model
            mic_input = gr.Audio(sources="microphone", type="filepath", label="Speak now") 
            # Entry (Text) to the CodeBuddy
            entry = gr.Textbox(label="Chat with our AI Assistant:")
        with gr.Column(scale=2):
            # Chatbot window
            chatbot = gr.Chatbot(height=800, type="messages")

    with gr.Row(scale=1):
        # Button to clear everything
        clear = gr.Button("Clear")

    # Status check of the models
    buddyState = gr.State(value=None)
    transcriberState = gr.State(value=None)  # 🔧 store transcriber instance

    # Transcribe audio to entry textbox
    def handle_audio(audio_path, model, transcriber):
        if audio_path is None:
            return "", transcriber
        
        # Init transcriber if needed
        if transcriber is None or transcriber.modelName != model:
            transcriber = initialize_sst(model)

        transcript = transcriber.run(audio_path)
        return transcript, transcriber

    # Mic input feed to the entry 
    mic_input.change(
        fn=handle_audio,
        inputs=[mic_input, trancriberModelName, transcriberState],
        outputs=[entry, transcriberState]
    )

    # Main chatbot function
    entry.submit(
        fn=chat_stream,
        inputs=[entry, chatbot, modelName, buddyState],
        outputs=[entry, chatbot, buddyState]
    )

    # Clear button now resets everything
    clear.click(
        fn=lambda: ("", None, []), # Sets entry to "", mic_input to None and chatbot to []
        inputs=[],
        outputs=[entry, mic_input, chatbot],
        queue=False
    )

ui.launch(inbrowser=True)
