In [7]:
import os
import requests
from dotenv import load_dotenv

load_dotenv()

TTS_API_KEY = os.getenv('TTS_API_KEY')
TTS_URL = os.getenv('TTS_URL')

def synthesize_korean_text(text, filename="output.wav"):
    endpoint = f"{TTS_URL}/v1/synthesize"
    headers = {
        "Content-Type": "application/json",
        "Accept": "audio/wav"
    }
    payload = {"text": text}
    auth = ("apikey", TTS_API_KEY)
    params = {"voice": "ko-KR_JinV3Voice"}

    response = requests.post(endpoint, headers=headers, params=params, json=payload, auth=auth, stream=True)
    
    if response.status_code == 200:
        with open(filename, "wb") as audio_file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    audio_file.write(chunk)
        print(f"{filename} 파일이 저장되었습니다.")
    else:
        print(f"에러: {response.status_code}")
        print(response.text)

if __name__ == "__main__":
    korean_text = '안녕하세요, 이것은 한국어 음성 합성 테스트입니다.'
    synthesize_korean_text(korean_text, "korean_output.wav")

korean_output.wav 파일이 저장되었습니다.


In [9]:
import os
import requests
from dotenv import load_dotenv

load_dotenv()

STT_API_KEY = os.getenv('STT_API_KEY')
STT_URL = os.getenv('STT_URL')

def korean_speech_to_text(audio_path):
    endpoint = f"{STT_URL}/v1/recognize"
    headers = {"Content-Type": "audio/wav"}
    auth = ("apikey", STT_API_KEY)
    
    params = {
        'model': 'ko-KR_Multimedia',
        'timestamps': True,
        'word_confidence': True,
        'smart_formatting': True
    }

    try:
        with open(audio_path, 'rb') as audio_file:
            response = requests.post(endpoint, headers=headers, data=audio_file, params=params, auth=auth)

        if response.status_code == 200:
            result = response.json()
            
            if 'results' in result and len(result['results']) > 0:
                best_result = result['results'][0]['alternatives'][0]
                transcript = best_result['transcript'].strip()
                confidence = best_result.get('confidence', 0)
                
                print(f"인식된 텍스트: {transcript}")
                print(f"정확도: {confidence:.2%}")
                
                return transcript
            else:
                print("음성이 명확하게 인식되지 않았습니다.")
                return None
        else:
            print(f"STT 변환 오류: {response.status_code}")
            print(response.text)
            return None
            
    except FileNotFoundError:
        print(f"파일을 찾을 수 없습니다: {audio_path}")
        return None
    except Exception as e:
        print(f"오류 발생: {str(e)}")
        return None

if __name__ == "__main__":
    audio_file = 'korean_output.wav'
    result = korean_speech_to_text(audio_file)
    
    if result:
        print(f"최종 결과: '{result}'")

인식된 텍스트: 안녕하세요 이것은 난 구거음성 합성 테스트입니다
정확도: 75.00%
최종 결과: '안녕하세요 이것은 난 구거음성 합성 테스트입니다'
