## MediPal -- Frontend -- API and Chatbox

### In this section
* I built an API endpoint by fastapi so that we only needs to launch the heavy process once. Other frontend apps just need to interact with the api. 
* I also built an chatbox with Gradio which can interact user by voice and text message. 

![](../assets/screenshots/front-end.PNG "")

Key techniques: whisper, gtts, fastapi, gradio, unicorn

##### The hardest part of this section should be deploying the whole thing to a inference platform or Cloud platform. That is is other domain. I will dive into it in later notebook.

#### The API endpoint is built by fastapi

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from src.medipal import ask

class Query(BaseModel):
    query: str

app = FastAPI()

@app.post("/ask")
def medipal_post_api(query: Query):
    user_query = query.query
    ai_response = ask(user_query)
    return {"message": ai_response}

@app.get("/ask")
def medipal_get_api(query: str):
    ai_response = ask(query)
    return {"message": ai_response}

In [None]:
import threading, time
import uvicorn

HOST = "127.0.0.1"
PORT = 30000

_config = uvicorn.Config(app, host=HOST, port=PORT, log_level="info")
_server = uvicorn.Server(_config)

def _run():
    # Runs its own asyncio loop in THIS thread, so it won't conflict with Jupyter's loop
    _server.run()

_server_thread = threading.Thread(target=_run, daemon=True)
_server_thread.start()

time.sleep(1)
print(f"✅ FastAPI running at http://{HOST}:{PORT}")
print("To stop the server, run the 'STOP' cell below.")

#### The chatbox was built by gradio, whisper and gtts

In [None]:
import os
import gradio as gr
import datetime
from gtts import gTTS
from faster_whisper import WhisperModel
import requests

url = "http://127.0.0.1:30000/ask" # medipal is running under the api

workspace_base_path = os.getcwd()
audio_path = os.path.join(workspace_base_path, "audio") 
timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
asr_model = WhisperModel("turbo")

def transcribe_audio(audio_file):
    """
    convert a audio file to text(ASR)    
    """
    if not audio_file:
        return None       
    segments, info  = asr_model.transcribe(audio_file, beam_size=5)
    text = "".join(seg.text for seg in segments)
    return text

def generate_response(query: str):
    """
    Call medipal api to generate the answer.   
    """
    if not query:
        return "I didn’t catch anything. Please try speaking or typing."
      
    payload = {"query": query}

    # Send POST request
    response = requests.post(url, json=payload)    
    ai_message = None
    if response.status_code == 200:
        ai_message = response.json()["message"]        
    else:
        print("Error:", response.status_code, response.text)
    return ai_message

def synthesize_audio(text):
    """
    Convert text to audio(TTS)    
    """
    filename = f"Response-{timestamp}.wav"    
    file_path = os.path.join(audio_path, filename) 
    # Text to Audio
    tts = gTTS(text, lang='en')
    tts.save(file_path)
    return file_path    

# ----------------------------
# Core interaction function
# ----------------------------
def voice_assistant(audio, text, history_state, use_audio_first):
    """
    audio: (sr, np.ndarray) or None
    text: str or None
    history_state: list[tuple[str, str]]
    use_audio_first: bool – if True, prefer audio when both provided
    """
    try:
        # Decide which input to use
        chosen_text = None
        if use_audio_first and audio is not None:
            chosen_text = transcribe_audio(audio)
        elif text:
            chosen_text = text
        elif audio is not None:
            chosen_text = transcribe_audio(audio)

        if not chosen_text:            
            raise gr.Error("Please provide either a voice recording or a text prompt.")

        query = chosen_text.strip()        

        # LLM response
        reply = generate_response(query)

        # Optional TTS
        audio_out = synthesize_audio(reply)

        # Update history for Chatbot
        history_state = history_state or []
        history_state.append((query, reply))

        # Return: chatbot, text output, audio output, cleared text box, preserved history
        return history_state, reply, audio_out, gr.update(value=""), history_state

    except Exception as e:
        #logging.exception("Error in voice_assistant")
        # Show the error gracefully in the text output; leave history unchanged
        return history_state, f"Error: {e}", None, gr.update(), history_state

def clear_history():
    return [], gr.update(value="")

# ----------------------------
# Custom CSS (sleek look)
# ----------------------------
CSS = """
    .gradio-container {max-width: 1024px !important;}
    #title {
    text-align: center;
    font-size: 1.75rem;
    font-weight: 800;
    letter-spacing: 0.3px;
    }
    #subtitle {
    text-align: center;
    color: #6b7280;
    margin-top: -10px;
    margin-bottom: 12px;
    }
    .card {
    background: linear-gradient(180deg, rgba(255,255,255,0.75) 0%, rgba(250,250,250,0.75) 100%);
    border: 1px solid rgba(0,0,0,0.06);
    border-radius: 16px;
    padding: 16px;
    box-shadow: 0 8px 22px rgba(0,0,0,0.06);
    }
    .footer {
    text-align: center;
    color: #9CA3AF;
    font-size: 0.875rem;
    margin-top: 8px;
    }
    """

# ----------------------------
# UI
# ----------------------------
with gr.Blocks(theme=gr.themes.Soft(), css=CSS, fill_height=True) as demo:
    gr.HTML('<div id="title">MediPal</div>')
    gr.HTML('<div id="subtitle">Your AI friend for medical and clinical Q&A</div>')

    with gr.Row():
        with gr.Column(scale=2):
            with gr.Group(elem_classes="card"):
                gr.Markdown("### Input")
                use_audio_first = gr.Checkbox(
                    value=True, label="Prefer voice if both provided"
                )
                mic = gr.Audio(
                    sources=["microphone"],
                    type="filepath",
                    label="🎤 Record your prompt",
                    waveform_options={"show_controls": True},
                )
                text_in = gr.Textbox(
                    label="⌨️ Or type here",
                    placeholder="Ask a question or say something...",
                    lines=2
                )
                with gr.Row():
                    send = gr.Button("Send", variant="primary")
                    clear_btn = gr.Button("Clear History")

        with gr.Column(scale=3):
            with gr.Group(elem_classes="card"):
                gr.Markdown("### Conversation")
                chat = gr.Chatbot(
                    label="History",
                    height=420,
                    show_copy_button=True,
                    avatar_images=(None, None),  # plug paths if you want custom avatars
                    bubble_full_width=False,
                )

            with gr.Group(elem_classes="card"):
                gr.Markdown("### Assistant Outputs")
                out_text = gr.Textbox(
                    label="Assistant (text output)", lines=4, interactive=False
                )
                out_audio = gr.Audio(
                    label="Assistant (voice output)", autoplay=True, interactive=False
                )

    history_state = gr.State([])

    # Click -> process
    send.click(
        voice_assistant,
        inputs=[mic, text_in, history_state, use_audio_first],
        outputs=[chat, out_text, out_audio, text_in, history_state],
        queue=True,
        show_progress=True
    )

    # Enter/submit on the textbox also triggers send
    text_in.submit(
        voice_assistant,
        inputs=[mic, text_in, history_state, use_audio_first],
        outputs=[chat, out_text, out_audio, text_in, history_state],
        queue=True,
        show_progress=True
    )

    # Clear
    clear_btn.click(
        clear_history,
        inputs=None,
        outputs=[chat, text_in],
    )
        
def launch_chatbox():
    demo.launch(server_port=30001) # I hard code the port here, you can change it

__all__ = ["launch_chatbox"]

if __name__ == "__main__":    
    launch_chatbox()