In [None]:
import pyaudio
import wave
import requests
import json
import os
import numpy as np
import time
from dotenv import load_dotenv
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams

# .env 파일에서 환경 변수 불러오기
load_dotenv()

# 환경 변수 설정
STT_API_KEY = os.getenv('STT_API_KEY')
STT_URL = os.getenv('STT_URL')
TTS_API_KEY = os.getenv('TTS_API_KEY')
TTS_URL = os.getenv('TTS_URL')
API_KEY = os.getenv('API_KEY')
PROJECT_ID = os.getenv('PROJECT_ID')
IBM_CLOUD_URL = os.getenv('IBM_CLOUD_URL')
MODEL_ID = os.getenv('MODEL_ID')

# 누락된 항목 확인
missing = []
for var in ['STT_API_KEY', 'STT_URL', 'TTS_API_KEY', 'TTS_URL', 'API_KEY', 'PROJECT_ID', 'IBM_CLOUD_URL', 'MODEL_ID']:
    if eval(var) is None:
        missing.append(var)
if missing:
    raise ValueError(f"다음 환경 변수가 없습니다: {', '.join(missing)}")

# 오디오 설정
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
SILENCE_THRESHOLD = 500
SILENCE_DURATION = 2

# LLM 초기화
generate_params = {GenParams.MAX_NEW_TOKENS: 900}
model = Model(
    model_id=MODEL_ID,
    params=generate_params,
    credentials={"apikey": API_KEY, "url": IBM_CLOUD_URL},
    project_id=PROJECT_ID
)

def record_audio():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
    print("🎙️ 말해주세요... (잠시 멈추면 녹음이 종료됩니다)")

    frames = []
    silent_chunks = 0

    while True:
        data = stream.read(CHUNK)
        frames.append(data)

        audio_data = np.frombuffer(data, dtype=np.int16)
        amplitude = np.abs(audio_data).mean()

        if amplitude < SILENCE_THRESHOLD:
            silent_chunks += 1
        else:
            silent_chunks = 0

        if silent_chunks > (SILENCE_DURATION * RATE / CHUNK):
            break

    print("🛑 녹음 종료")
    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open("input.wav", 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()
    return "input.wav"

def speech_to_text(audio_file):
    headers = {"Content-Type": "audio/wav"}
    with open(audio_file, 'rb') as f:
        response = requests.post(
            STT_URL,
            headers=headers,
            data=f,
            params={"model": "ko-KR_Multimedia"},
            auth=("apikey", STT_API_KEY)
        )
    if response.status_code == 200:
        result = response.json()
        if result.get("results"):
            return result["results"][0]["alternatives"][0]["transcript"]
    return "음성 인식 오류"

def generate_response(text):
    system_prompt = "당신은 유용한 한국어 도우미입니다."
    formatted_prompt = f"<<SYS>>\n{system_prompt.strip()}\n<</SYS>>\n\n[INST]{text.strip()}[/INST]"
    response = model.generate(prompt=formatted_prompt)
    return response["results"][0]["generated_text"].strip()

def text_to_speech(text):
    headers = {
        "Content-Type": "application/json",
        "Accept": "audio/wav"
    }
    data = json.dumps({"text": text})
    response = requests.post(
        TTS_URL + "?voice=ko-KR_JinV3Voice",
        headers=headers,
        data=data,
        auth=("apikey", TTS_API_KEY)
    )
    if response.status_code == 200:
        with open("output.wav", "wb") as f:
            f.write(response.content)
        return "output.wav"
    return None

def play_audio(audio_file):
    wf = wave.open(audio_file, 'rb')
    p = pyaudio.PyAudio()
    stream = p.open(
        format=p.get_format_from_width(wf.getsampwidth()),
        channels=wf.getnchannels(),
        rate=wf.getframerate(),
        output=True
    )
    data = wf.readframes(CHUNK)
    while data:
        stream.write(data)
        data = wf.readframes(CHUNK)
    stream.stop_stream()
    stream.close()
    p.terminate()

def main():
    while True:
        audio_file = record_audio()
        text = speech_to_text(audio_file)
        print(f"🗣️ 사용자: {text}")

        if "오류" in text:
            print("⚠️ 음성 인식 실패. 다시 시도하세요.")
            continue

        response = generate_response(text)
        print(f"🤖 AI 응답: {response}")

        audio_response = text_to_speech(response)
        if audio_response:
            play_audio(audio_response)

        os.remove(audio_file)
        if audio_response:
            os.remove(audio_response)

        print("⏸ 5초간 대기 중...\n")
        time.sleep(5)

if __name__ == "__main__":
    main()


🎙️ 말해주세요... (잠시 멈추면 녹음이 종료됩니다)
