In [None]:
import requests
from IPython.display import Markdown
import re

 # 엔드포인트와 키 가져오기
endpoint = ''
key = ''

messages = []

# system

def chatbot_basic(history) :
    # header 정보 입력하기
    headers = {
        "Content-Type":"application/json",
        "api-key": key
    }
    
    # user
    messages.append({"role": "user", "content": history[-1]["content"]})


    # body 정보 입력하기 
    body = { 
    "messages": messages,
    "temperature": 0.2,
    "top_p": 0.9,
    "max_tokens": 1800,  
    "stop" : None,    
    }         
     
    
    # POST 
    response = requests.post(endpoint, headers=headers, json=body)
    if response.status_code == 200 :
        response_json = response.json()
        content = response_json['choices'][0]['message']['content']
        ## citation_list = response_json['choices'][0]['message']['context']['citations']
        ## content = re.sub(r'\[doc(\d+)\]', r'[참조 \1]', content)        
        return content
    else :
        return ""

In [None]:
import requests 

## api 불러오기
def request_tts(text, voice="ko-KR-SeoHyeonNeural", file_name="response_audio.mp3") :
    end_point = ""
    api_key = ""
    content_type = "application/ssml+xml"
    output_format = "audio-16khz-64kbitrate-mono-mp3"
    
    body_raw = f"""<speak version='1.0' xml:lang='ko-KR'><voice name='{voice}'>{text}</voice></speak>"""
    
    headers = {
        "Ocp-Apim-Subscription-Key" : api_key,
        "Content-Type" : content_type,
        "X-Microsoft-OutputFormat" : output_format
    }
    response = requests.post(end_point, headers = headers, data=body_raw)

    if response.status_code == 200 :
        with open(file_name, 'wb') as audio_file :
            audio_file.write(response.content)
        return file_name
    else :
        return None

In [None]:
import requests

def request_stt(file_path) :
    end_point = ""
    api_key = ""
    content_type = "audio/wav"    

    query_params = {
        "language" : "ko-KR",
        "format" : "detailed"
    }    
    
    headers = {
        "Ocp-Apim-Subscription-Key" : api_key,
        "Content-Type" : content_type,        
    }

    with open(file_path, "rb") as audio :
        audio_data = audio.read()

    response = requests.post(end_point, params=query_params, headers = headers, data=audio_data) 

    if response.status_code == 200 :
        response_json = response.json()
        text_result = response_json['DisplayText']
        
        return text_result
    else :
        return None
    


In [None]:
def change_audio(audio_path) :
    print(audio_path)

    # request stt
    text = request_stt(file_path=audio_path)
    return text


In [None]:
import re

def clean_text(text) :
    text = re.sub(r'[^가-힣a-zA-Z0-9\s!?]', '', text)
    
    return text

In [None]:
a = "안녕하세요ad! 😊\n일 더하기 일은 **이**입니다!"
clean_text(a)

In [None]:
def change_tts(tts_text, voice_name) :
    tts_re_text = clean_text(tts_text)
    #request tts
    audio_output = request_tts(tts_re_text, voice=voice_name)
    return audio_output

In [None]:
def change_chatbot(history, voice_name) :
    
    if history == 0 :
        return None
    elif history[-1]['role'] == "user" :
        return None
    else :
        answer = history[-1]["content"]
        if voice_name is None:
            voice_name = "ko-KR-SeoHyeonNeural"  # Default voice
        answer_audio = change_tts(answer, voice_name)        
        return answer_audio

In [None]:
import gradio as gr 

theme = gr.themes.Origin(
    primary_hue="rose",
    secondary_hue="red",
    neutral_hue="slate",
    text_size=gr.themes.Size(lg="17px", md="15px", sm="13px", xl="24px", xs="12px", xxl="28px", xxs="10px"),
    radius_size="lg",
    font=[gr.themes.GoogleFont('Gowun Batang'), gr.themes.GoogleFont('IBM Plex Sans KR '), gr.themes.GoogleFont('42dot Sans '), 'sans-serif'],
    font_mono=[gr.themes.GoogleFont('Gowun Batang'), gr.themes.GoogleFont('IBM Plex Sans KR '), gr.themes.GoogleFont('42dot Sans '), 'monospace'],
).set(
    body_background_fill='*background_fill_secondary',
    body_background_fill_dark='*neutral_800',    
    body_text_color='*neutral_700',
    body_text_size='*text_md',
    embed_radius='*radius_md',
    block_radius='*radius_md',
    block_title_radius='*radius_md',
    block_title_text_size='*text_md',
    container_radius='*radius_md',
    input_text_size='*text_sm',
    button_large_text_size='*text_md',
    form_gap_width='0px'     
)


with gr.Blocks(theme=theme) as demo :
    gr.Markdown("<h1 align='center'> ✨Welcome to AI World</h1>", min_height="40px")
    voice_list_female = ["ko-KR-SunHiNeural", "ko-KR-SeoHyeonNeural", "ko-KR-JiMinNeural", "ko-KR-SoonBokNeural", "ko-KR-YuJinNeural"]
    voice_list_male = ["ko-KR-InJoonNeural", "ko-KR-HyunsuNeural", "ko-KR-BongJinNeural", "ko-KR-GookMinNeural", "ko-KR-HyunsuNeural" ]
    voice_list = voice_list_female 
    default_voice = "ko-KR-SeoHyeonNeural"
    
    with gr.Row() :  
        with gr.Column(scale=3) : 
            gr.Markdown("### Chatbot", min_height="25px")
            # chatbot 구역
            chatbot = gr.Chatbot(type="messages", bubble_full_width=False, show_copy_button=True)                     

        with gr.Column(scale=1) :
            gr.Markdown("### Voice Parameter", min_height="25px")
            # 선택 요소
            gender_select = gr.Radio(["Female", "Male"], label="Gender Select")
            choice = gr.Dropdown(voice_list, label="Voice Select", value=default_voice)
            test_btn = gr.Button("Test Voice", size="sm")
            # 오디오 비쥬얼
            chatbot_audio = gr.Audio(label='GPT Answer', autoplay=True)

    with gr.Row() :
            # user input 구역 
            user_input = gr.Textbox(show_label=False, placeholder="You can use the 'record' button bottom to speak.", scale=3)
            with gr.Column(scale=1) :
                submit_btn = gr.Button("Submit", scale=1, variant="primary")
                clear_btn = gr.Button("Clear")      
             
    with gr.Row() :
        with gr.Column(scale=2) :
            # STT 구역
            gr.Markdown("### Speech here to Text", min_height="25px")    
            input_mic = gr.Audio(label="Mic", sources="microphone", type="filepath", waveform_options=gr.WaveformOptions(waveform_color="#FFF0BD", waveform_progress_color="#F8F3D9", skip_length=2, show_controls=False))
            input_file = gr.Audio(label="File", sources="upload", type="filepath")
        with gr.Column(scale=1) :
            # TTS 구역
            gr.Markdown("### Text here to Speech", min_height="25px")
            tts_textbox = gr.Textbox(label="Text", placeholder="text here")
            send_tts_button = gr.Button("Send")
            output_tts_audio = gr.Audio(label="Audio", interactive=False)         

            
    def user(user_message, history: list):
        return "", history + [{"role": "user", "content": user_message}]
    
    def bot(history: list):
        answer = chatbot_basic(history)
        history.append({"role": "assistant", "content": answer})
        return history
    
    def gender_selects(select) :
        if select == "Female" :
            return gr.update(choices=voice_list_female, value=voice_list_female[0])                    
        else :
            return gr.update(choices=voice_list_male, value=voice_list_male[0])
                
    def audio_clean() :
        return None

    tts_test = gr.Textbox("안녕하세요. 지금은 테스트 중입니다.", visible=False)

    send_tts_button.click(fn=change_tts, inputs=[tts_textbox, choice], outputs=[output_tts_audio])
    tts_textbox.submit(fn=change_tts, inputs=[tts_textbox, choice], outputs=[output_tts_audio])
    test_btn.click(fn=change_tts, inputs=[tts_test, choice], outputs=[chatbot_audio])

    input_mic.change(fn=change_audio, inputs=[input_mic], outputs=[user_input]).then(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(
        bot, chatbot, [chatbot])
    input_file.change(fn=change_audio, inputs=[input_mic], outputs=[user_input]).then(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(
        bot, chatbot, [chatbot])

    # chatbot 답변 자동 tts            
    audio_clear_btn = gr.ClearButton([chatbot_audio], visible=False)            
    chatbot.change(fn=change_chatbot, inputs=[chatbot, choice], outputs=[chatbot_audio])            
    clear_btn.click(lambda : None, None, chatbot).then(audio_clean, outputs=[chatbot_audio])

    # button 클릭시 이벤트 처리
    gender_select.change(gender_selects, inputs=[gender_select], outputs=[choice])    
    submit_btn.click(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(
        bot, chatbot, [chatbot])    
    user_input.submit(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(
        bot, chatbot, [chatbot])
        
demo.launch()